diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 643 |
1 files changed, 367 insertions, 276 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 724efc63904d..3167480b532c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -143,9 +143,19 @@ struct mapped_device { | |||
143 | int barrier_error; | 143 | int barrier_error; |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * Protect barrier_error from concurrent endio processing | ||
147 | * in request-based dm. | ||
148 | */ | ||
149 | spinlock_t barrier_error_lock; | ||
150 | |||
151 | /* | ||
146 | * Processing queue (flush/barriers) | 152 | * Processing queue (flush/barriers) |
147 | */ | 153 | */ |
148 | struct workqueue_struct *wq; | 154 | struct workqueue_struct *wq; |
155 | struct work_struct barrier_work; | ||
156 | |||
157 | /* A pointer to the currently processing pre/post flush request */ | ||
158 | struct request *flush_request; | ||
149 | 159 | ||
150 | /* | 160 | /* |
151 | * The current mapping. | 161 | * The current mapping. |
@@ -178,9 +188,6 @@ struct mapped_device { | |||
178 | /* forced geometry settings */ | 188 | /* forced geometry settings */ |
179 | struct hd_geometry geometry; | 189 | struct hd_geometry geometry; |
180 | 190 | ||
181 | /* marker of flush suspend for request-based dm */ | ||
182 | struct request suspend_rq; | ||
183 | |||
184 | /* For saving the address of __make_request for request based dm */ | 191 | /* For saving the address of __make_request for request based dm */ |
185 | make_request_fn *saved_make_request_fn; | 192 | make_request_fn *saved_make_request_fn; |
186 | 193 | ||
@@ -275,6 +282,7 @@ static int (*_inits[])(void) __initdata = { | |||
275 | dm_target_init, | 282 | dm_target_init, |
276 | dm_linear_init, | 283 | dm_linear_init, |
277 | dm_stripe_init, | 284 | dm_stripe_init, |
285 | dm_io_init, | ||
278 | dm_kcopyd_init, | 286 | dm_kcopyd_init, |
279 | dm_interface_init, | 287 | dm_interface_init, |
280 | }; | 288 | }; |
@@ -284,6 +292,7 @@ static void (*_exits[])(void) = { | |||
284 | dm_target_exit, | 292 | dm_target_exit, |
285 | dm_linear_exit, | 293 | dm_linear_exit, |
286 | dm_stripe_exit, | 294 | dm_stripe_exit, |
295 | dm_io_exit, | ||
287 | dm_kcopyd_exit, | 296 | dm_kcopyd_exit, |
288 | dm_interface_exit, | 297 | dm_interface_exit, |
289 | }; | 298 | }; |
@@ -320,6 +329,11 @@ static void __exit dm_exit(void) | |||
320 | /* | 329 | /* |
321 | * Block device functions | 330 | * Block device functions |
322 | */ | 331 | */ |
332 | int dm_deleting_md(struct mapped_device *md) | ||
333 | { | ||
334 | return test_bit(DMF_DELETING, &md->flags); | ||
335 | } | ||
336 | |||
323 | static int dm_blk_open(struct block_device *bdev, fmode_t mode) | 337 | static int dm_blk_open(struct block_device *bdev, fmode_t mode) |
324 | { | 338 | { |
325 | struct mapped_device *md; | 339 | struct mapped_device *md; |
@@ -331,7 +345,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) | |||
331 | goto out; | 345 | goto out; |
332 | 346 | ||
333 | if (test_bit(DMF_FREEING, &md->flags) || | 347 | if (test_bit(DMF_FREEING, &md->flags) || |
334 | test_bit(DMF_DELETING, &md->flags)) { | 348 | dm_deleting_md(md)) { |
335 | md = NULL; | 349 | md = NULL; |
336 | goto out; | 350 | goto out; |
337 | } | 351 | } |
@@ -388,7 +402,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, | |||
388 | unsigned int cmd, unsigned long arg) | 402 | unsigned int cmd, unsigned long arg) |
389 | { | 403 | { |
390 | struct mapped_device *md = bdev->bd_disk->private_data; | 404 | struct mapped_device *md = bdev->bd_disk->private_data; |
391 | struct dm_table *map = dm_get_table(md); | 405 | struct dm_table *map = dm_get_live_table(md); |
392 | struct dm_target *tgt; | 406 | struct dm_target *tgt; |
393 | int r = -ENOTTY; | 407 | int r = -ENOTTY; |
394 | 408 | ||
@@ -401,7 +415,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, | |||
401 | 415 | ||
402 | tgt = dm_table_get_target(map, 0); | 416 | tgt = dm_table_get_target(map, 0); |
403 | 417 | ||
404 | if (dm_suspended(md)) { | 418 | if (dm_suspended_md(md)) { |
405 | r = -EAGAIN; | 419 | r = -EAGAIN; |
406 | goto out; | 420 | goto out; |
407 | } | 421 | } |
@@ -430,9 +444,10 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | |||
430 | mempool_free(tio, md->tio_pool); | 444 | mempool_free(tio, md->tio_pool); |
431 | } | 445 | } |
432 | 446 | ||
433 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) | 447 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, |
448 | gfp_t gfp_mask) | ||
434 | { | 449 | { |
435 | return mempool_alloc(md->tio_pool, GFP_ATOMIC); | 450 | return mempool_alloc(md->tio_pool, gfp_mask); |
436 | } | 451 | } |
437 | 452 | ||
438 | static void free_rq_tio(struct dm_rq_target_io *tio) | 453 | static void free_rq_tio(struct dm_rq_target_io *tio) |
@@ -450,6 +465,12 @@ static void free_bio_info(struct dm_rq_clone_bio_info *info) | |||
450 | mempool_free(info, info->tio->md->io_pool); | 465 | mempool_free(info, info->tio->md->io_pool); |
451 | } | 466 | } |
452 | 467 | ||
468 | static int md_in_flight(struct mapped_device *md) | ||
469 | { | ||
470 | return atomic_read(&md->pending[READ]) + | ||
471 | atomic_read(&md->pending[WRITE]); | ||
472 | } | ||
473 | |||
453 | static void start_io_acct(struct dm_io *io) | 474 | static void start_io_acct(struct dm_io *io) |
454 | { | 475 | { |
455 | struct mapped_device *md = io->md; | 476 | struct mapped_device *md = io->md; |
@@ -512,7 +533,7 @@ static void queue_io(struct mapped_device *md, struct bio *bio) | |||
512 | * function to access the md->map field, and make sure they call | 533 | * function to access the md->map field, and make sure they call |
513 | * dm_table_put() when finished. | 534 | * dm_table_put() when finished. |
514 | */ | 535 | */ |
515 | struct dm_table *dm_get_table(struct mapped_device *md) | 536 | struct dm_table *dm_get_live_table(struct mapped_device *md) |
516 | { | 537 | { |
517 | struct dm_table *t; | 538 | struct dm_table *t; |
518 | unsigned long flags; | 539 | unsigned long flags; |
@@ -716,28 +737,38 @@ static void end_clone_bio(struct bio *clone, int error) | |||
716 | blk_update_request(tio->orig, 0, nr_bytes); | 737 | blk_update_request(tio->orig, 0, nr_bytes); |
717 | } | 738 | } |
718 | 739 | ||
740 | static void store_barrier_error(struct mapped_device *md, int error) | ||
741 | { | ||
742 | unsigned long flags; | ||
743 | |||
744 | spin_lock_irqsave(&md->barrier_error_lock, flags); | ||
745 | /* | ||
746 | * Basically, the first error is taken, but: | ||
747 | * -EOPNOTSUPP supersedes any I/O error. | ||
748 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | ||
749 | */ | ||
750 | if (!md->barrier_error || error == -EOPNOTSUPP || | ||
751 | (md->barrier_error != -EOPNOTSUPP && | ||
752 | error == DM_ENDIO_REQUEUE)) | ||
753 | md->barrier_error = error; | ||
754 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | ||
755 | } | ||
756 | |||
719 | /* | 757 | /* |
720 | * Don't touch any member of the md after calling this function because | 758 | * Don't touch any member of the md after calling this function because |
721 | * the md may be freed in dm_put() at the end of this function. | 759 | * the md may be freed in dm_put() at the end of this function. |
722 | * Or do dm_get() before calling this function and dm_put() later. | 760 | * Or do dm_get() before calling this function and dm_put() later. |
723 | */ | 761 | */ |
724 | static void rq_completed(struct mapped_device *md, int run_queue) | 762 | static void rq_completed(struct mapped_device *md, int rw, int run_queue) |
725 | { | 763 | { |
726 | int wakeup_waiters = 0; | 764 | atomic_dec(&md->pending[rw]); |
727 | struct request_queue *q = md->queue; | ||
728 | unsigned long flags; | ||
729 | |||
730 | spin_lock_irqsave(q->queue_lock, flags); | ||
731 | if (!queue_in_flight(q)) | ||
732 | wakeup_waiters = 1; | ||
733 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
734 | 765 | ||
735 | /* nudge anyone waiting on suspend queue */ | 766 | /* nudge anyone waiting on suspend queue */ |
736 | if (wakeup_waiters) | 767 | if (!md_in_flight(md)) |
737 | wake_up(&md->wait); | 768 | wake_up(&md->wait); |
738 | 769 | ||
739 | if (run_queue) | 770 | if (run_queue) |
740 | blk_run_queue(q); | 771 | blk_run_queue(md->queue); |
741 | 772 | ||
742 | /* | 773 | /* |
743 | * dm_put() must be at the end of this function. See the comment above | 774 | * dm_put() must be at the end of this function. See the comment above |
@@ -753,6 +784,44 @@ static void free_rq_clone(struct request *clone) | |||
753 | free_rq_tio(tio); | 784 | free_rq_tio(tio); |
754 | } | 785 | } |
755 | 786 | ||
787 | /* | ||
788 | * Complete the clone and the original request. | ||
789 | * Must be called without queue lock. | ||
790 | */ | ||
791 | static void dm_end_request(struct request *clone, int error) | ||
792 | { | ||
793 | int rw = rq_data_dir(clone); | ||
794 | int run_queue = 1; | ||
795 | bool is_barrier = blk_barrier_rq(clone); | ||
796 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
797 | struct mapped_device *md = tio->md; | ||
798 | struct request *rq = tio->orig; | ||
799 | |||
800 | if (blk_pc_request(rq) && !is_barrier) { | ||
801 | rq->errors = clone->errors; | ||
802 | rq->resid_len = clone->resid_len; | ||
803 | |||
804 | if (rq->sense) | ||
805 | /* | ||
806 | * We are using the sense buffer of the original | ||
807 | * request. | ||
808 | * So setting the length of the sense data is enough. | ||
809 | */ | ||
810 | rq->sense_len = clone->sense_len; | ||
811 | } | ||
812 | |||
813 | free_rq_clone(clone); | ||
814 | |||
815 | if (unlikely(is_barrier)) { | ||
816 | if (unlikely(error)) | ||
817 | store_barrier_error(md, error); | ||
818 | run_queue = 0; | ||
819 | } else | ||
820 | blk_end_request_all(rq, error); | ||
821 | |||
822 | rq_completed(md, rw, run_queue); | ||
823 | } | ||
824 | |||
756 | static void dm_unprep_request(struct request *rq) | 825 | static void dm_unprep_request(struct request *rq) |
757 | { | 826 | { |
758 | struct request *clone = rq->special; | 827 | struct request *clone = rq->special; |
@@ -768,12 +837,23 @@ static void dm_unprep_request(struct request *rq) | |||
768 | */ | 837 | */ |
769 | void dm_requeue_unmapped_request(struct request *clone) | 838 | void dm_requeue_unmapped_request(struct request *clone) |
770 | { | 839 | { |
840 | int rw = rq_data_dir(clone); | ||
771 | struct dm_rq_target_io *tio = clone->end_io_data; | 841 | struct dm_rq_target_io *tio = clone->end_io_data; |
772 | struct mapped_device *md = tio->md; | 842 | struct mapped_device *md = tio->md; |
773 | struct request *rq = tio->orig; | 843 | struct request *rq = tio->orig; |
774 | struct request_queue *q = rq->q; | 844 | struct request_queue *q = rq->q; |
775 | unsigned long flags; | 845 | unsigned long flags; |
776 | 846 | ||
847 | if (unlikely(blk_barrier_rq(clone))) { | ||
848 | /* | ||
849 | * Barrier clones share an original request. | ||
850 | * Leave it to dm_end_request(), which handles this special | ||
851 | * case. | ||
852 | */ | ||
853 | dm_end_request(clone, DM_ENDIO_REQUEUE); | ||
854 | return; | ||
855 | } | ||
856 | |||
777 | dm_unprep_request(rq); | 857 | dm_unprep_request(rq); |
778 | 858 | ||
779 | spin_lock_irqsave(q->queue_lock, flags); | 859 | spin_lock_irqsave(q->queue_lock, flags); |
@@ -782,7 +862,7 @@ void dm_requeue_unmapped_request(struct request *clone) | |||
782 | blk_requeue_request(q, rq); | 862 | blk_requeue_request(q, rq); |
783 | spin_unlock_irqrestore(q->queue_lock, flags); | 863 | spin_unlock_irqrestore(q->queue_lock, flags); |
784 | 864 | ||
785 | rq_completed(md, 0); | 865 | rq_completed(md, rw, 0); |
786 | } | 866 | } |
787 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); | 867 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); |
788 | 868 | ||
@@ -815,34 +895,28 @@ static void start_queue(struct request_queue *q) | |||
815 | spin_unlock_irqrestore(q->queue_lock, flags); | 895 | spin_unlock_irqrestore(q->queue_lock, flags); |
816 | } | 896 | } |
817 | 897 | ||
818 | /* | 898 | static void dm_done(struct request *clone, int error, bool mapped) |
819 | * Complete the clone and the original request. | ||
820 | * Must be called without queue lock. | ||
821 | */ | ||
822 | static void dm_end_request(struct request *clone, int error) | ||
823 | { | 899 | { |
900 | int r = error; | ||
824 | struct dm_rq_target_io *tio = clone->end_io_data; | 901 | struct dm_rq_target_io *tio = clone->end_io_data; |
825 | struct mapped_device *md = tio->md; | 902 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; |
826 | struct request *rq = tio->orig; | ||
827 | 903 | ||
828 | if (blk_pc_request(rq)) { | 904 | if (mapped && rq_end_io) |
829 | rq->errors = clone->errors; | 905 | r = rq_end_io(tio->ti, clone, error, &tio->info); |
830 | rq->resid_len = clone->resid_len; | ||
831 | 906 | ||
832 | if (rq->sense) | 907 | if (r <= 0) |
833 | /* | 908 | /* The target wants to complete the I/O */ |
834 | * We are using the sense buffer of the original | 909 | dm_end_request(clone, r); |
835 | * request. | 910 | else if (r == DM_ENDIO_INCOMPLETE) |
836 | * So setting the length of the sense data is enough. | 911 | /* The target will handle the I/O */ |
837 | */ | 912 | return; |
838 | rq->sense_len = clone->sense_len; | 913 | else if (r == DM_ENDIO_REQUEUE) |
914 | /* The target wants to requeue the I/O */ | ||
915 | dm_requeue_unmapped_request(clone); | ||
916 | else { | ||
917 | DMWARN("unimplemented target endio return value: %d", r); | ||
918 | BUG(); | ||
839 | } | 919 | } |
840 | |||
841 | free_rq_clone(clone); | ||
842 | |||
843 | blk_end_request_all(rq, error); | ||
844 | |||
845 | rq_completed(md, 1); | ||
846 | } | 920 | } |
847 | 921 | ||
848 | /* | 922 | /* |
@@ -850,27 +924,14 @@ static void dm_end_request(struct request *clone, int error) | |||
850 | */ | 924 | */ |
851 | static void dm_softirq_done(struct request *rq) | 925 | static void dm_softirq_done(struct request *rq) |
852 | { | 926 | { |
927 | bool mapped = true; | ||
853 | struct request *clone = rq->completion_data; | 928 | struct request *clone = rq->completion_data; |
854 | struct dm_rq_target_io *tio = clone->end_io_data; | 929 | struct dm_rq_target_io *tio = clone->end_io_data; |
855 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | ||
856 | int error = tio->error; | ||
857 | 930 | ||
858 | if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io) | 931 | if (rq->cmd_flags & REQ_FAILED) |
859 | error = rq_end_io(tio->ti, clone, error, &tio->info); | 932 | mapped = false; |
860 | 933 | ||
861 | if (error <= 0) | 934 | dm_done(clone, tio->error, mapped); |
862 | /* The target wants to complete the I/O */ | ||
863 | dm_end_request(clone, error); | ||
864 | else if (error == DM_ENDIO_INCOMPLETE) | ||
865 | /* The target will handle the I/O */ | ||
866 | return; | ||
867 | else if (error == DM_ENDIO_REQUEUE) | ||
868 | /* The target wants to requeue the I/O */ | ||
869 | dm_requeue_unmapped_request(clone); | ||
870 | else { | ||
871 | DMWARN("unimplemented target endio return value: %d", error); | ||
872 | BUG(); | ||
873 | } | ||
874 | } | 935 | } |
875 | 936 | ||
876 | /* | 937 | /* |
@@ -882,6 +943,19 @@ static void dm_complete_request(struct request *clone, int error) | |||
882 | struct dm_rq_target_io *tio = clone->end_io_data; | 943 | struct dm_rq_target_io *tio = clone->end_io_data; |
883 | struct request *rq = tio->orig; | 944 | struct request *rq = tio->orig; |
884 | 945 | ||
946 | if (unlikely(blk_barrier_rq(clone))) { | ||
947 | /* | ||
948 | * Barrier clones share an original request. So can't use | ||
949 | * softirq_done with the original. | ||
950 | * Pass the clone to dm_done() directly in this special case. | ||
951 | * It is safe (even if clone->q->queue_lock is held here) | ||
952 | * because there is no I/O dispatching during the completion | ||
953 | * of barrier clone. | ||
954 | */ | ||
955 | dm_done(clone, error, true); | ||
956 | return; | ||
957 | } | ||
958 | |||
885 | tio->error = error; | 959 | tio->error = error; |
886 | rq->completion_data = clone; | 960 | rq->completion_data = clone; |
887 | blk_complete_request(rq); | 961 | blk_complete_request(rq); |
@@ -898,6 +972,17 @@ void dm_kill_unmapped_request(struct request *clone, int error) | |||
898 | struct dm_rq_target_io *tio = clone->end_io_data; | 972 | struct dm_rq_target_io *tio = clone->end_io_data; |
899 | struct request *rq = tio->orig; | 973 | struct request *rq = tio->orig; |
900 | 974 | ||
975 | if (unlikely(blk_barrier_rq(clone))) { | ||
976 | /* | ||
977 | * Barrier clones share an original request. | ||
978 | * Leave it to dm_end_request(), which handles this special | ||
979 | * case. | ||
980 | */ | ||
981 | BUG_ON(error > 0); | ||
982 | dm_end_request(clone, error); | ||
983 | return; | ||
984 | } | ||
985 | |||
901 | rq->cmd_flags |= REQ_FAILED; | 986 | rq->cmd_flags |= REQ_FAILED; |
902 | dm_complete_request(clone, error); | 987 | dm_complete_request(clone, error); |
903 | } | 988 | } |
@@ -1214,7 +1299,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1214 | struct clone_info ci; | 1299 | struct clone_info ci; |
1215 | int error = 0; | 1300 | int error = 0; |
1216 | 1301 | ||
1217 | ci.map = dm_get_table(md); | 1302 | ci.map = dm_get_live_table(md); |
1218 | if (unlikely(!ci.map)) { | 1303 | if (unlikely(!ci.map)) { |
1219 | if (!bio_rw_flagged(bio, BIO_RW_BARRIER)) | 1304 | if (!bio_rw_flagged(bio, BIO_RW_BARRIER)) |
1220 | bio_io_error(bio); | 1305 | bio_io_error(bio); |
@@ -1255,7 +1340,7 @@ static int dm_merge_bvec(struct request_queue *q, | |||
1255 | struct bio_vec *biovec) | 1340 | struct bio_vec *biovec) |
1256 | { | 1341 | { |
1257 | struct mapped_device *md = q->queuedata; | 1342 | struct mapped_device *md = q->queuedata; |
1258 | struct dm_table *map = dm_get_table(md); | 1343 | struct dm_table *map = dm_get_live_table(md); |
1259 | struct dm_target *ti; | 1344 | struct dm_target *ti; |
1260 | sector_t max_sectors; | 1345 | sector_t max_sectors; |
1261 | int max_size = 0; | 1346 | int max_size = 0; |
@@ -1352,11 +1437,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio) | |||
1352 | { | 1437 | { |
1353 | struct mapped_device *md = q->queuedata; | 1438 | struct mapped_device *md = q->queuedata; |
1354 | 1439 | ||
1355 | if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { | ||
1356 | bio_endio(bio, -EOPNOTSUPP); | ||
1357 | return 0; | ||
1358 | } | ||
1359 | |||
1360 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | 1440 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ |
1361 | } | 1441 | } |
1362 | 1442 | ||
@@ -1375,6 +1455,25 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
1375 | return _dm_request(q, bio); | 1455 | return _dm_request(q, bio); |
1376 | } | 1456 | } |
1377 | 1457 | ||
1458 | /* | ||
1459 | * Mark this request as flush request, so that dm_request_fn() can | ||
1460 | * recognize. | ||
1461 | */ | ||
1462 | static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq) | ||
1463 | { | ||
1464 | rq->cmd_type = REQ_TYPE_LINUX_BLOCK; | ||
1465 | rq->cmd[0] = REQ_LB_OP_FLUSH; | ||
1466 | } | ||
1467 | |||
1468 | static bool dm_rq_is_flush_request(struct request *rq) | ||
1469 | { | ||
1470 | if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK && | ||
1471 | rq->cmd[0] == REQ_LB_OP_FLUSH) | ||
1472 | return true; | ||
1473 | else | ||
1474 | return false; | ||
1475 | } | ||
1476 | |||
1378 | void dm_dispatch_request(struct request *rq) | 1477 | void dm_dispatch_request(struct request *rq) |
1379 | { | 1478 | { |
1380 | int r; | 1479 | int r; |
@@ -1420,25 +1519,54 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | |||
1420 | static int setup_clone(struct request *clone, struct request *rq, | 1519 | static int setup_clone(struct request *clone, struct request *rq, |
1421 | struct dm_rq_target_io *tio) | 1520 | struct dm_rq_target_io *tio) |
1422 | { | 1521 | { |
1423 | int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | 1522 | int r; |
1424 | dm_rq_bio_constructor, tio); | ||
1425 | 1523 | ||
1426 | if (r) | 1524 | if (dm_rq_is_flush_request(rq)) { |
1427 | return r; | 1525 | blk_rq_init(NULL, clone); |
1526 | clone->cmd_type = REQ_TYPE_FS; | ||
1527 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | ||
1528 | } else { | ||
1529 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1530 | dm_rq_bio_constructor, tio); | ||
1531 | if (r) | ||
1532 | return r; | ||
1533 | |||
1534 | clone->cmd = rq->cmd; | ||
1535 | clone->cmd_len = rq->cmd_len; | ||
1536 | clone->sense = rq->sense; | ||
1537 | clone->buffer = rq->buffer; | ||
1538 | } | ||
1428 | 1539 | ||
1429 | clone->cmd = rq->cmd; | ||
1430 | clone->cmd_len = rq->cmd_len; | ||
1431 | clone->sense = rq->sense; | ||
1432 | clone->buffer = rq->buffer; | ||
1433 | clone->end_io = end_clone_request; | 1540 | clone->end_io = end_clone_request; |
1434 | clone->end_io_data = tio; | 1541 | clone->end_io_data = tio; |
1435 | 1542 | ||
1436 | return 0; | 1543 | return 0; |
1437 | } | 1544 | } |
1438 | 1545 | ||
1439 | static int dm_rq_flush_suspending(struct mapped_device *md) | 1546 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, |
1547 | gfp_t gfp_mask) | ||
1440 | { | 1548 | { |
1441 | return !md->suspend_rq.special; | 1549 | struct request *clone; |
1550 | struct dm_rq_target_io *tio; | ||
1551 | |||
1552 | tio = alloc_rq_tio(md, gfp_mask); | ||
1553 | if (!tio) | ||
1554 | return NULL; | ||
1555 | |||
1556 | tio->md = md; | ||
1557 | tio->ti = NULL; | ||
1558 | tio->orig = rq; | ||
1559 | tio->error = 0; | ||
1560 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1561 | |||
1562 | clone = &tio->clone; | ||
1563 | if (setup_clone(clone, rq, tio)) { | ||
1564 | /* -ENOMEM */ | ||
1565 | free_rq_tio(tio); | ||
1566 | return NULL; | ||
1567 | } | ||
1568 | |||
1569 | return clone; | ||
1442 | } | 1570 | } |
1443 | 1571 | ||
1444 | /* | 1572 | /* |
@@ -1447,39 +1575,19 @@ static int dm_rq_flush_suspending(struct mapped_device *md) | |||
1447 | static int dm_prep_fn(struct request_queue *q, struct request *rq) | 1575 | static int dm_prep_fn(struct request_queue *q, struct request *rq) |
1448 | { | 1576 | { |
1449 | struct mapped_device *md = q->queuedata; | 1577 | struct mapped_device *md = q->queuedata; |
1450 | struct dm_rq_target_io *tio; | ||
1451 | struct request *clone; | 1578 | struct request *clone; |
1452 | 1579 | ||
1453 | if (unlikely(rq == &md->suspend_rq)) { | 1580 | if (unlikely(dm_rq_is_flush_request(rq))) |
1454 | if (dm_rq_flush_suspending(md)) | 1581 | return BLKPREP_OK; |
1455 | return BLKPREP_OK; | ||
1456 | else | ||
1457 | /* The flush suspend was interrupted */ | ||
1458 | return BLKPREP_KILL; | ||
1459 | } | ||
1460 | 1582 | ||
1461 | if (unlikely(rq->special)) { | 1583 | if (unlikely(rq->special)) { |
1462 | DMWARN("Already has something in rq->special."); | 1584 | DMWARN("Already has something in rq->special."); |
1463 | return BLKPREP_KILL; | 1585 | return BLKPREP_KILL; |
1464 | } | 1586 | } |
1465 | 1587 | ||
1466 | tio = alloc_rq_tio(md); /* Only one for each original request */ | 1588 | clone = clone_rq(rq, md, GFP_ATOMIC); |
1467 | if (!tio) | 1589 | if (!clone) |
1468 | /* -ENOMEM */ | ||
1469 | return BLKPREP_DEFER; | ||
1470 | |||
1471 | tio->md = md; | ||
1472 | tio->ti = NULL; | ||
1473 | tio->orig = rq; | ||
1474 | tio->error = 0; | ||
1475 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1476 | |||
1477 | clone = &tio->clone; | ||
1478 | if (setup_clone(clone, rq, tio)) { | ||
1479 | /* -ENOMEM */ | ||
1480 | free_rq_tio(tio); | ||
1481 | return BLKPREP_DEFER; | 1590 | return BLKPREP_DEFER; |
1482 | } | ||
1483 | 1591 | ||
1484 | rq->special = clone; | 1592 | rq->special = clone; |
1485 | rq->cmd_flags |= REQ_DONTPREP; | 1593 | rq->cmd_flags |= REQ_DONTPREP; |
@@ -1487,11 +1595,10 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1487 | return BLKPREP_OK; | 1595 | return BLKPREP_OK; |
1488 | } | 1596 | } |
1489 | 1597 | ||
1490 | static void map_request(struct dm_target *ti, struct request *rq, | 1598 | static void map_request(struct dm_target *ti, struct request *clone, |
1491 | struct mapped_device *md) | 1599 | struct mapped_device *md) |
1492 | { | 1600 | { |
1493 | int r; | 1601 | int r; |
1494 | struct request *clone = rq->special; | ||
1495 | struct dm_rq_target_io *tio = clone->end_io_data; | 1602 | struct dm_rq_target_io *tio = clone->end_io_data; |
1496 | 1603 | ||
1497 | /* | 1604 | /* |
@@ -1511,6 +1618,8 @@ static void map_request(struct dm_target *ti, struct request *rq, | |||
1511 | break; | 1618 | break; |
1512 | case DM_MAPIO_REMAPPED: | 1619 | case DM_MAPIO_REMAPPED: |
1513 | /* The target has remapped the I/O so dispatch it */ | 1620 | /* The target has remapped the I/O so dispatch it */ |
1621 | trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), | ||
1622 | blk_rq_pos(tio->orig)); | ||
1514 | dm_dispatch_request(clone); | 1623 | dm_dispatch_request(clone); |
1515 | break; | 1624 | break; |
1516 | case DM_MAPIO_REQUEUE: | 1625 | case DM_MAPIO_REQUEUE: |
@@ -1536,29 +1645,26 @@ static void map_request(struct dm_target *ti, struct request *rq, | |||
1536 | static void dm_request_fn(struct request_queue *q) | 1645 | static void dm_request_fn(struct request_queue *q) |
1537 | { | 1646 | { |
1538 | struct mapped_device *md = q->queuedata; | 1647 | struct mapped_device *md = q->queuedata; |
1539 | struct dm_table *map = dm_get_table(md); | 1648 | struct dm_table *map = dm_get_live_table(md); |
1540 | struct dm_target *ti; | 1649 | struct dm_target *ti; |
1541 | struct request *rq; | 1650 | struct request *rq, *clone; |
1542 | 1651 | ||
1543 | /* | 1652 | /* |
1544 | * For noflush suspend, check blk_queue_stopped() to immediately | 1653 | * For suspend, check blk_queue_stopped() and increment |
1545 | * quit I/O dispatching. | 1654 | * ->pending within a single queue_lock not to increment the |
1655 | * number of in-flight I/Os after the queue is stopped in | ||
1656 | * dm_suspend(). | ||
1546 | */ | 1657 | */ |
1547 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | 1658 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { |
1548 | rq = blk_peek_request(q); | 1659 | rq = blk_peek_request(q); |
1549 | if (!rq) | 1660 | if (!rq) |
1550 | goto plug_and_out; | 1661 | goto plug_and_out; |
1551 | 1662 | ||
1552 | if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ | 1663 | if (unlikely(dm_rq_is_flush_request(rq))) { |
1553 | if (queue_in_flight(q)) | 1664 | BUG_ON(md->flush_request); |
1554 | /* Not quiet yet. Wait more */ | 1665 | md->flush_request = rq; |
1555 | goto plug_and_out; | ||
1556 | |||
1557 | /* This device should be quiet now */ | ||
1558 | __stop_queue(q); | ||
1559 | blk_start_request(rq); | 1666 | blk_start_request(rq); |
1560 | __blk_end_request_all(rq, 0); | 1667 | queue_work(md->wq, &md->barrier_work); |
1561 | wake_up(&md->wait); | ||
1562 | goto out; | 1668 | goto out; |
1563 | } | 1669 | } |
1564 | 1670 | ||
@@ -1567,8 +1673,11 @@ static void dm_request_fn(struct request_queue *q) | |||
1567 | goto plug_and_out; | 1673 | goto plug_and_out; |
1568 | 1674 | ||
1569 | blk_start_request(rq); | 1675 | blk_start_request(rq); |
1676 | clone = rq->special; | ||
1677 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
1678 | |||
1570 | spin_unlock(q->queue_lock); | 1679 | spin_unlock(q->queue_lock); |
1571 | map_request(ti, rq, md); | 1680 | map_request(ti, clone, md); |
1572 | spin_lock_irq(q->queue_lock); | 1681 | spin_lock_irq(q->queue_lock); |
1573 | } | 1682 | } |
1574 | 1683 | ||
@@ -1595,7 +1704,7 @@ static int dm_lld_busy(struct request_queue *q) | |||
1595 | { | 1704 | { |
1596 | int r; | 1705 | int r; |
1597 | struct mapped_device *md = q->queuedata; | 1706 | struct mapped_device *md = q->queuedata; |
1598 | struct dm_table *map = dm_get_table(md); | 1707 | struct dm_table *map = dm_get_live_table(md); |
1599 | 1708 | ||
1600 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | 1709 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) |
1601 | r = 1; | 1710 | r = 1; |
@@ -1610,7 +1719,7 @@ static int dm_lld_busy(struct request_queue *q) | |||
1610 | static void dm_unplug_all(struct request_queue *q) | 1719 | static void dm_unplug_all(struct request_queue *q) |
1611 | { | 1720 | { |
1612 | struct mapped_device *md = q->queuedata; | 1721 | struct mapped_device *md = q->queuedata; |
1613 | struct dm_table *map = dm_get_table(md); | 1722 | struct dm_table *map = dm_get_live_table(md); |
1614 | 1723 | ||
1615 | if (map) { | 1724 | if (map) { |
1616 | if (dm_request_based(md)) | 1725 | if (dm_request_based(md)) |
@@ -1628,7 +1737,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
1628 | struct dm_table *map; | 1737 | struct dm_table *map; |
1629 | 1738 | ||
1630 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 1739 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
1631 | map = dm_get_table(md); | 1740 | map = dm_get_live_table(md); |
1632 | if (map) { | 1741 | if (map) { |
1633 | /* | 1742 | /* |
1634 | * Request-based dm cares about only own queue for | 1743 | * Request-based dm cares about only own queue for |
@@ -1725,6 +1834,7 @@ out: | |||
1725 | static const struct block_device_operations dm_blk_dops; | 1834 | static const struct block_device_operations dm_blk_dops; |
1726 | 1835 | ||
1727 | static void dm_wq_work(struct work_struct *work); | 1836 | static void dm_wq_work(struct work_struct *work); |
1837 | static void dm_rq_barrier_work(struct work_struct *work); | ||
1728 | 1838 | ||
1729 | /* | 1839 | /* |
1730 | * Allocate and initialise a blank device with a given minor. | 1840 | * Allocate and initialise a blank device with a given minor. |
@@ -1754,6 +1864,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1754 | init_rwsem(&md->io_lock); | 1864 | init_rwsem(&md->io_lock); |
1755 | mutex_init(&md->suspend_lock); | 1865 | mutex_init(&md->suspend_lock); |
1756 | spin_lock_init(&md->deferred_lock); | 1866 | spin_lock_init(&md->deferred_lock); |
1867 | spin_lock_init(&md->barrier_error_lock); | ||
1757 | rwlock_init(&md->map_lock); | 1868 | rwlock_init(&md->map_lock); |
1758 | atomic_set(&md->holders, 1); | 1869 | atomic_set(&md->holders, 1); |
1759 | atomic_set(&md->open_count, 0); | 1870 | atomic_set(&md->open_count, 0); |
@@ -1788,6 +1899,8 @@ static struct mapped_device *alloc_dev(int minor) | |||
1788 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 1899 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
1789 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 1900 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
1790 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 1901 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
1902 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH, | ||
1903 | dm_rq_prepare_flush); | ||
1791 | 1904 | ||
1792 | md->disk = alloc_disk(1); | 1905 | md->disk = alloc_disk(1); |
1793 | if (!md->disk) | 1906 | if (!md->disk) |
@@ -1797,6 +1910,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1797 | atomic_set(&md->pending[1], 0); | 1910 | atomic_set(&md->pending[1], 0); |
1798 | init_waitqueue_head(&md->wait); | 1911 | init_waitqueue_head(&md->wait); |
1799 | INIT_WORK(&md->work, dm_wq_work); | 1912 | INIT_WORK(&md->work, dm_wq_work); |
1913 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | ||
1800 | init_waitqueue_head(&md->eventq); | 1914 | init_waitqueue_head(&md->eventq); |
1801 | 1915 | ||
1802 | md->disk->major = _major; | 1916 | md->disk->major = _major; |
@@ -1921,9 +2035,13 @@ static void __set_size(struct mapped_device *md, sector_t size) | |||
1921 | mutex_unlock(&md->bdev->bd_inode->i_mutex); | 2035 | mutex_unlock(&md->bdev->bd_inode->i_mutex); |
1922 | } | 2036 | } |
1923 | 2037 | ||
1924 | static int __bind(struct mapped_device *md, struct dm_table *t, | 2038 | /* |
1925 | struct queue_limits *limits) | 2039 | * Returns old map, which caller must destroy. |
2040 | */ | ||
2041 | static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | ||
2042 | struct queue_limits *limits) | ||
1926 | { | 2043 | { |
2044 | struct dm_table *old_map; | ||
1927 | struct request_queue *q = md->queue; | 2045 | struct request_queue *q = md->queue; |
1928 | sector_t size; | 2046 | sector_t size; |
1929 | unsigned long flags; | 2047 | unsigned long flags; |
@@ -1938,11 +2056,6 @@ static int __bind(struct mapped_device *md, struct dm_table *t, | |||
1938 | 2056 | ||
1939 | __set_size(md, size); | 2057 | __set_size(md, size); |
1940 | 2058 | ||
1941 | if (!size) { | ||
1942 | dm_table_destroy(t); | ||
1943 | return 0; | ||
1944 | } | ||
1945 | |||
1946 | dm_table_event_callback(t, event_callback, md); | 2059 | dm_table_event_callback(t, event_callback, md); |
1947 | 2060 | ||
1948 | /* | 2061 | /* |
@@ -1958,26 +2071,31 @@ static int __bind(struct mapped_device *md, struct dm_table *t, | |||
1958 | __bind_mempools(md, t); | 2071 | __bind_mempools(md, t); |
1959 | 2072 | ||
1960 | write_lock_irqsave(&md->map_lock, flags); | 2073 | write_lock_irqsave(&md->map_lock, flags); |
2074 | old_map = md->map; | ||
1961 | md->map = t; | 2075 | md->map = t; |
1962 | dm_table_set_restrictions(t, q, limits); | 2076 | dm_table_set_restrictions(t, q, limits); |
1963 | write_unlock_irqrestore(&md->map_lock, flags); | 2077 | write_unlock_irqrestore(&md->map_lock, flags); |
1964 | 2078 | ||
1965 | return 0; | 2079 | return old_map; |
1966 | } | 2080 | } |
1967 | 2081 | ||
1968 | static void __unbind(struct mapped_device *md) | 2082 | /* |
2083 | * Returns unbound table for the caller to free. | ||
2084 | */ | ||
2085 | static struct dm_table *__unbind(struct mapped_device *md) | ||
1969 | { | 2086 | { |
1970 | struct dm_table *map = md->map; | 2087 | struct dm_table *map = md->map; |
1971 | unsigned long flags; | 2088 | unsigned long flags; |
1972 | 2089 | ||
1973 | if (!map) | 2090 | if (!map) |
1974 | return; | 2091 | return NULL; |
1975 | 2092 | ||
1976 | dm_table_event_callback(map, NULL, NULL); | 2093 | dm_table_event_callback(map, NULL, NULL); |
1977 | write_lock_irqsave(&md->map_lock, flags); | 2094 | write_lock_irqsave(&md->map_lock, flags); |
1978 | md->map = NULL; | 2095 | md->map = NULL; |
1979 | write_unlock_irqrestore(&md->map_lock, flags); | 2096 | write_unlock_irqrestore(&md->map_lock, flags); |
1980 | dm_table_destroy(map); | 2097 | |
2098 | return map; | ||
1981 | } | 2099 | } |
1982 | 2100 | ||
1983 | /* | 2101 | /* |
@@ -2059,18 +2177,18 @@ void dm_put(struct mapped_device *md) | |||
2059 | BUG_ON(test_bit(DMF_FREEING, &md->flags)); | 2177 | BUG_ON(test_bit(DMF_FREEING, &md->flags)); |
2060 | 2178 | ||
2061 | if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { | 2179 | if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { |
2062 | map = dm_get_table(md); | 2180 | map = dm_get_live_table(md); |
2063 | idr_replace(&_minor_idr, MINOR_ALLOCED, | 2181 | idr_replace(&_minor_idr, MINOR_ALLOCED, |
2064 | MINOR(disk_devt(dm_disk(md)))); | 2182 | MINOR(disk_devt(dm_disk(md)))); |
2065 | set_bit(DMF_FREEING, &md->flags); | 2183 | set_bit(DMF_FREEING, &md->flags); |
2066 | spin_unlock(&_minor_lock); | 2184 | spin_unlock(&_minor_lock); |
2067 | if (!dm_suspended(md)) { | 2185 | if (!dm_suspended_md(md)) { |
2068 | dm_table_presuspend_targets(map); | 2186 | dm_table_presuspend_targets(map); |
2069 | dm_table_postsuspend_targets(map); | 2187 | dm_table_postsuspend_targets(map); |
2070 | } | 2188 | } |
2071 | dm_sysfs_exit(md); | 2189 | dm_sysfs_exit(md); |
2072 | dm_table_put(map); | 2190 | dm_table_put(map); |
2073 | __unbind(md); | 2191 | dm_table_destroy(__unbind(md)); |
2074 | free_dev(md); | 2192 | free_dev(md); |
2075 | } | 2193 | } |
2076 | } | 2194 | } |
@@ -2080,8 +2198,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2080 | { | 2198 | { |
2081 | int r = 0; | 2199 | int r = 0; |
2082 | DECLARE_WAITQUEUE(wait, current); | 2200 | DECLARE_WAITQUEUE(wait, current); |
2083 | struct request_queue *q = md->queue; | ||
2084 | unsigned long flags; | ||
2085 | 2201 | ||
2086 | dm_unplug_all(md->queue); | 2202 | dm_unplug_all(md->queue); |
2087 | 2203 | ||
@@ -2091,15 +2207,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2091 | set_current_state(interruptible); | 2207 | set_current_state(interruptible); |
2092 | 2208 | ||
2093 | smp_mb(); | 2209 | smp_mb(); |
2094 | if (dm_request_based(md)) { | 2210 | if (!md_in_flight(md)) |
2095 | spin_lock_irqsave(q->queue_lock, flags); | ||
2096 | if (!queue_in_flight(q) && blk_queue_stopped(q)) { | ||
2097 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2098 | break; | ||
2099 | } | ||
2100 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2101 | } else if (!atomic_read(&md->pending[0]) && | ||
2102 | !atomic_read(&md->pending[1])) | ||
2103 | break; | 2211 | break; |
2104 | 2212 | ||
2105 | if (interruptible == TASK_INTERRUPTIBLE && | 2213 | if (interruptible == TASK_INTERRUPTIBLE && |
@@ -2194,98 +2302,106 @@ static void dm_queue_flush(struct mapped_device *md) | |||
2194 | queue_work(md->wq, &md->work); | 2302 | queue_work(md->wq, &md->work); |
2195 | } | 2303 | } |
2196 | 2304 | ||
2197 | /* | 2305 | static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr) |
2198 | * Swap in a new table (destroying old one). | ||
2199 | */ | ||
2200 | int dm_swap_table(struct mapped_device *md, struct dm_table *table) | ||
2201 | { | 2306 | { |
2202 | struct queue_limits limits; | 2307 | struct dm_rq_target_io *tio = clone->end_io_data; |
2203 | int r = -EINVAL; | ||
2204 | 2308 | ||
2205 | mutex_lock(&md->suspend_lock); | 2309 | tio->info.flush_request = flush_nr; |
2310 | } | ||
2206 | 2311 | ||
2207 | /* device must be suspended */ | 2312 | /* Issue barrier requests to targets and wait for their completion. */ |
2208 | if (!dm_suspended(md)) | 2313 | static int dm_rq_barrier(struct mapped_device *md) |
2209 | goto out; | 2314 | { |
2315 | int i, j; | ||
2316 | struct dm_table *map = dm_get_live_table(md); | ||
2317 | unsigned num_targets = dm_table_get_num_targets(map); | ||
2318 | struct dm_target *ti; | ||
2319 | struct request *clone; | ||
2210 | 2320 | ||
2211 | r = dm_calculate_queue_limits(table, &limits); | 2321 | md->barrier_error = 0; |
2212 | if (r) | ||
2213 | goto out; | ||
2214 | 2322 | ||
2215 | /* cannot change the device type, once a table is bound */ | 2323 | for (i = 0; i < num_targets; i++) { |
2216 | if (md->map && | 2324 | ti = dm_table_get_target(map, i); |
2217 | (dm_table_get_type(md->map) != dm_table_get_type(table))) { | 2325 | for (j = 0; j < ti->num_flush_requests; j++) { |
2218 | DMWARN("can't change the device type after a table is bound"); | 2326 | clone = clone_rq(md->flush_request, md, GFP_NOIO); |
2219 | goto out; | 2327 | dm_rq_set_flush_nr(clone, j); |
2328 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
2329 | map_request(ti, clone, md); | ||
2330 | } | ||
2220 | } | 2331 | } |
2221 | 2332 | ||
2222 | __unbind(md); | 2333 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2223 | r = __bind(md, table, &limits); | 2334 | dm_table_put(map); |
2224 | |||
2225 | out: | ||
2226 | mutex_unlock(&md->suspend_lock); | ||
2227 | return r; | ||
2228 | } | ||
2229 | 2335 | ||
2230 | static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) | 2336 | return md->barrier_error; |
2231 | { | ||
2232 | md->suspend_rq.special = (void *)0x1; | ||
2233 | } | 2337 | } |
2234 | 2338 | ||
2235 | static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) | 2339 | static void dm_rq_barrier_work(struct work_struct *work) |
2236 | { | 2340 | { |
2341 | int error; | ||
2342 | struct mapped_device *md = container_of(work, struct mapped_device, | ||
2343 | barrier_work); | ||
2237 | struct request_queue *q = md->queue; | 2344 | struct request_queue *q = md->queue; |
2345 | struct request *rq; | ||
2238 | unsigned long flags; | 2346 | unsigned long flags; |
2239 | 2347 | ||
2240 | spin_lock_irqsave(q->queue_lock, flags); | 2348 | /* |
2241 | if (!noflush) | 2349 | * Hold the md reference here and leave it at the last part so that |
2242 | dm_rq_invalidate_suspend_marker(md); | 2350 | * the md can't be deleted by device opener when the barrier request |
2243 | __start_queue(q); | 2351 | * completes. |
2244 | spin_unlock_irqrestore(q->queue_lock, flags); | 2352 | */ |
2245 | } | 2353 | dm_get(md); |
2246 | 2354 | ||
2247 | static void dm_rq_start_suspend(struct mapped_device *md, int noflush) | 2355 | error = dm_rq_barrier(md); |
2248 | { | ||
2249 | struct request *rq = &md->suspend_rq; | ||
2250 | struct request_queue *q = md->queue; | ||
2251 | 2356 | ||
2252 | if (noflush) | 2357 | rq = md->flush_request; |
2253 | stop_queue(q); | 2358 | md->flush_request = NULL; |
2254 | else { | 2359 | |
2255 | blk_rq_init(q, rq); | 2360 | if (error == DM_ENDIO_REQUEUE) { |
2256 | blk_insert_request(q, rq, 0, NULL); | 2361 | spin_lock_irqsave(q->queue_lock, flags); |
2257 | } | 2362 | blk_requeue_request(q, rq); |
2363 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2364 | } else | ||
2365 | blk_end_request_all(rq, error); | ||
2366 | |||
2367 | blk_run_queue(q); | ||
2368 | |||
2369 | dm_put(md); | ||
2258 | } | 2370 | } |
2259 | 2371 | ||
2260 | static int dm_rq_suspend_available(struct mapped_device *md, int noflush) | 2372 | /* |
2373 | * Swap in a new table, returning the old one for the caller to destroy. | ||
2374 | */ | ||
2375 | struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) | ||
2261 | { | 2376 | { |
2262 | int r = 1; | 2377 | struct dm_table *map = ERR_PTR(-EINVAL); |
2263 | struct request *rq = &md->suspend_rq; | 2378 | struct queue_limits limits; |
2264 | struct request_queue *q = md->queue; | 2379 | int r; |
2265 | unsigned long flags; | ||
2266 | 2380 | ||
2267 | if (noflush) | 2381 | mutex_lock(&md->suspend_lock); |
2268 | return r; | ||
2269 | 2382 | ||
2270 | /* The marker must be protected by queue lock if it is in use */ | 2383 | /* device must be suspended */ |
2271 | spin_lock_irqsave(q->queue_lock, flags); | 2384 | if (!dm_suspended_md(md)) |
2272 | if (unlikely(rq->ref_count)) { | 2385 | goto out; |
2273 | /* | 2386 | |
2274 | * This can happen, when the previous flush suspend was | 2387 | r = dm_calculate_queue_limits(table, &limits); |
2275 | * interrupted, the marker is still in the queue and | 2388 | if (r) { |
2276 | * this flush suspend has been invoked, because we don't | 2389 | map = ERR_PTR(r); |
2277 | * remove the marker at the time of suspend interruption. | 2390 | goto out; |
2278 | * We have only one marker per mapped_device, so we can't | ||
2279 | * start another flush suspend while it is in use. | ||
2280 | */ | ||
2281 | BUG_ON(!rq->special); /* The marker should be invalidated */ | ||
2282 | DMWARN("Invalidating the previous flush suspend is still in" | ||
2283 | " progress. Please retry later."); | ||
2284 | r = 0; | ||
2285 | } | 2391 | } |
2286 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2287 | 2392 | ||
2288 | return r; | 2393 | /* cannot change the device type, once a table is bound */ |
2394 | if (md->map && | ||
2395 | (dm_table_get_type(md->map) != dm_table_get_type(table))) { | ||
2396 | DMWARN("can't change the device type after a table is bound"); | ||
2397 | goto out; | ||
2398 | } | ||
2399 | |||
2400 | map = __bind(md, table, &limits); | ||
2401 | |||
2402 | out: | ||
2403 | mutex_unlock(&md->suspend_lock); | ||
2404 | return map; | ||
2289 | } | 2405 | } |
2290 | 2406 | ||
2291 | /* | 2407 | /* |
@@ -2330,49 +2446,11 @@ static void unlock_fs(struct mapped_device *md) | |||
2330 | /* | 2446 | /* |
2331 | * Suspend mechanism in request-based dm. | 2447 | * Suspend mechanism in request-based dm. |
2332 | * | 2448 | * |
2333 | * After the suspend starts, further incoming requests are kept in | 2449 | * 1. Flush all I/Os by lock_fs() if needed. |
2334 | * the request_queue and deferred. | 2450 | * 2. Stop dispatching any I/O by stopping the request_queue. |
2335 | * Remaining requests in the request_queue at the start of suspend are flushed | 2451 | * 3. Wait for all in-flight I/Os to be completed or requeued. |
2336 | * if it is flush suspend. | ||
2337 | * The suspend completes when the following conditions have been satisfied, | ||
2338 | * so wait for it: | ||
2339 | * 1. q->in_flight is 0 (which means no in_flight request) | ||
2340 | * 2. queue has been stopped (which means no request dispatching) | ||
2341 | * | ||
2342 | * | 2452 | * |
2343 | * Noflush suspend | 2453 | * To abort suspend, start the request_queue. |
2344 | * --------------- | ||
2345 | * Noflush suspend doesn't need to dispatch remaining requests. | ||
2346 | * So stop the queue immediately. Then, wait for all in_flight requests | ||
2347 | * to be completed or requeued. | ||
2348 | * | ||
2349 | * To abort noflush suspend, start the queue. | ||
2350 | * | ||
2351 | * | ||
2352 | * Flush suspend | ||
2353 | * ------------- | ||
2354 | * Flush suspend needs to dispatch remaining requests. So stop the queue | ||
2355 | * after the remaining requests are completed. (Requeued request must be also | ||
2356 | * re-dispatched and completed. Until then, we can't stop the queue.) | ||
2357 | * | ||
2358 | * During flushing the remaining requests, further incoming requests are also | ||
2359 | * inserted to the same queue. To distinguish which requests are to be | ||
2360 | * flushed, we insert a marker request to the queue at the time of starting | ||
2361 | * flush suspend, like a barrier. | ||
2362 | * The dispatching is blocked when the marker is found on the top of the queue. | ||
2363 | * And the queue is stopped when all in_flight requests are completed, since | ||
2364 | * that means the remaining requests are completely flushed. | ||
2365 | * Then, the marker is removed from the queue. | ||
2366 | * | ||
2367 | * To abort flush suspend, we also need to take care of the marker, not only | ||
2368 | * starting the queue. | ||
2369 | * We don't remove the marker forcibly from the queue since it's against | ||
2370 | * the block-layer manner. Instead, we put a invalidated mark on the marker. | ||
2371 | * When the invalidated marker is found on the top of the queue, it is | ||
2372 | * immediately removed from the queue, so it doesn't block dispatching. | ||
2373 | * Because we have only one marker per mapped_device, we can't start another | ||
2374 | * flush suspend until the invalidated marker is removed from the queue. | ||
2375 | * So fail and return with -EBUSY in such a case. | ||
2376 | */ | 2454 | */ |
2377 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2455 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
2378 | { | 2456 | { |
@@ -2383,17 +2461,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2383 | 2461 | ||
2384 | mutex_lock(&md->suspend_lock); | 2462 | mutex_lock(&md->suspend_lock); |
2385 | 2463 | ||
2386 | if (dm_suspended(md)) { | 2464 | if (dm_suspended_md(md)) { |
2387 | r = -EINVAL; | 2465 | r = -EINVAL; |
2388 | goto out_unlock; | 2466 | goto out_unlock; |
2389 | } | 2467 | } |
2390 | 2468 | ||
2391 | if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { | 2469 | map = dm_get_live_table(md); |
2392 | r = -EBUSY; | ||
2393 | goto out_unlock; | ||
2394 | } | ||
2395 | |||
2396 | map = dm_get_table(md); | ||
2397 | 2470 | ||
2398 | /* | 2471 | /* |
2399 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. | 2472 | * DMF_NOFLUSH_SUSPENDING must be set before presuspend. |
@@ -2406,8 +2479,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2406 | dm_table_presuspend_targets(map); | 2479 | dm_table_presuspend_targets(map); |
2407 | 2480 | ||
2408 | /* | 2481 | /* |
2409 | * Flush I/O to the device. noflush supersedes do_lockfs, | 2482 | * Flush I/O to the device. |
2410 | * because lock_fs() needs to flush I/Os. | 2483 | * Any I/O submitted after lock_fs() may not be flushed. |
2484 | * noflush takes precedence over do_lockfs. | ||
2485 | * (lock_fs() flushes I/Os and waits for them to complete.) | ||
2411 | */ | 2486 | */ |
2412 | if (!noflush && do_lockfs) { | 2487 | if (!noflush && do_lockfs) { |
2413 | r = lock_fs(md); | 2488 | r = lock_fs(md); |
@@ -2436,10 +2511,15 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2436 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | 2511 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); |
2437 | up_write(&md->io_lock); | 2512 | up_write(&md->io_lock); |
2438 | 2513 | ||
2439 | flush_workqueue(md->wq); | 2514 | /* |
2440 | 2515 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | |
2516 | * can be kicked until md->queue is stopped. So stop md->queue before | ||
2517 | * flushing md->wq. | ||
2518 | */ | ||
2441 | if (dm_request_based(md)) | 2519 | if (dm_request_based(md)) |
2442 | dm_rq_start_suspend(md, noflush); | 2520 | stop_queue(md->queue); |
2521 | |||
2522 | flush_workqueue(md->wq); | ||
2443 | 2523 | ||
2444 | /* | 2524 | /* |
2445 | * At this point no more requests are entering target request routines. | 2525 | * At this point no more requests are entering target request routines. |
@@ -2458,7 +2538,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2458 | dm_queue_flush(md); | 2538 | dm_queue_flush(md); |
2459 | 2539 | ||
2460 | if (dm_request_based(md)) | 2540 | if (dm_request_based(md)) |
2461 | dm_rq_abort_suspend(md, noflush); | 2541 | start_queue(md->queue); |
2462 | 2542 | ||
2463 | unlock_fs(md); | 2543 | unlock_fs(md); |
2464 | goto out; /* pushback list is already flushed, so skip flush */ | 2544 | goto out; /* pushback list is already flushed, so skip flush */ |
@@ -2470,10 +2550,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2470 | * requests are being added to md->deferred list. | 2550 | * requests are being added to md->deferred list. |
2471 | */ | 2551 | */ |
2472 | 2552 | ||
2473 | dm_table_postsuspend_targets(map); | ||
2474 | |||
2475 | set_bit(DMF_SUSPENDED, &md->flags); | 2553 | set_bit(DMF_SUSPENDED, &md->flags); |
2476 | 2554 | ||
2555 | dm_table_postsuspend_targets(map); | ||
2556 | |||
2477 | out: | 2557 | out: |
2478 | dm_table_put(map); | 2558 | dm_table_put(map); |
2479 | 2559 | ||
@@ -2488,10 +2568,10 @@ int dm_resume(struct mapped_device *md) | |||
2488 | struct dm_table *map = NULL; | 2568 | struct dm_table *map = NULL; |
2489 | 2569 | ||
2490 | mutex_lock(&md->suspend_lock); | 2570 | mutex_lock(&md->suspend_lock); |
2491 | if (!dm_suspended(md)) | 2571 | if (!dm_suspended_md(md)) |
2492 | goto out; | 2572 | goto out; |
2493 | 2573 | ||
2494 | map = dm_get_table(md); | 2574 | map = dm_get_live_table(md); |
2495 | if (!map || !dm_table_get_size(map)) | 2575 | if (!map || !dm_table_get_size(map)) |
2496 | goto out; | 2576 | goto out; |
2497 | 2577 | ||
@@ -2592,18 +2672,29 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | |||
2592 | return NULL; | 2672 | return NULL; |
2593 | 2673 | ||
2594 | if (test_bit(DMF_FREEING, &md->flags) || | 2674 | if (test_bit(DMF_FREEING, &md->flags) || |
2595 | test_bit(DMF_DELETING, &md->flags)) | 2675 | dm_deleting_md(md)) |
2596 | return NULL; | 2676 | return NULL; |
2597 | 2677 | ||
2598 | dm_get(md); | 2678 | dm_get(md); |
2599 | return md; | 2679 | return md; |
2600 | } | 2680 | } |
2601 | 2681 | ||
2602 | int dm_suspended(struct mapped_device *md) | 2682 | int dm_suspended_md(struct mapped_device *md) |
2603 | { | 2683 | { |
2604 | return test_bit(DMF_SUSPENDED, &md->flags); | 2684 | return test_bit(DMF_SUSPENDED, &md->flags); |
2605 | } | 2685 | } |
2606 | 2686 | ||
2687 | int dm_suspended(struct dm_target *ti) | ||
2688 | { | ||
2689 | struct mapped_device *md = dm_table_get_md(ti->table); | ||
2690 | int r = dm_suspended_md(md); | ||
2691 | |||
2692 | dm_put(md); | ||
2693 | |||
2694 | return r; | ||
2695 | } | ||
2696 | EXPORT_SYMBOL_GPL(dm_suspended); | ||
2697 | |||
2607 | int dm_noflush_suspending(struct dm_target *ti) | 2698 | int dm_noflush_suspending(struct dm_target *ti) |
2608 | { | 2699 | { |
2609 | struct mapped_device *md = dm_table_get_md(ti->table); | 2700 | struct mapped_device *md = dm_table_get_md(ti->table); |