aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c643
1 files changed, 367 insertions, 276 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 724efc63904d..3167480b532c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -143,9 +143,19 @@ struct mapped_device {
143 int barrier_error; 143 int barrier_error;
144 144
145 /* 145 /*
146 * Protect barrier_error from concurrent endio processing
147 * in request-based dm.
148 */
149 spinlock_t barrier_error_lock;
150
151 /*
146 * Processing queue (flush/barriers) 152 * Processing queue (flush/barriers)
147 */ 153 */
148 struct workqueue_struct *wq; 154 struct workqueue_struct *wq;
155 struct work_struct barrier_work;
156
157 /* A pointer to the currently processing pre/post flush request */
158 struct request *flush_request;
149 159
150 /* 160 /*
151 * The current mapping. 161 * The current mapping.
@@ -178,9 +188,6 @@ struct mapped_device {
178 /* forced geometry settings */ 188 /* forced geometry settings */
179 struct hd_geometry geometry; 189 struct hd_geometry geometry;
180 190
181 /* marker of flush suspend for request-based dm */
182 struct request suspend_rq;
183
184 /* For saving the address of __make_request for request based dm */ 191 /* For saving the address of __make_request for request based dm */
185 make_request_fn *saved_make_request_fn; 192 make_request_fn *saved_make_request_fn;
186 193
@@ -275,6 +282,7 @@ static int (*_inits[])(void) __initdata = {
275 dm_target_init, 282 dm_target_init,
276 dm_linear_init, 283 dm_linear_init,
277 dm_stripe_init, 284 dm_stripe_init,
285 dm_io_init,
278 dm_kcopyd_init, 286 dm_kcopyd_init,
279 dm_interface_init, 287 dm_interface_init,
280}; 288};
@@ -284,6 +292,7 @@ static void (*_exits[])(void) = {
284 dm_target_exit, 292 dm_target_exit,
285 dm_linear_exit, 293 dm_linear_exit,
286 dm_stripe_exit, 294 dm_stripe_exit,
295 dm_io_exit,
287 dm_kcopyd_exit, 296 dm_kcopyd_exit,
288 dm_interface_exit, 297 dm_interface_exit,
289}; 298};
@@ -320,6 +329,11 @@ static void __exit dm_exit(void)
320/* 329/*
321 * Block device functions 330 * Block device functions
322 */ 331 */
332int dm_deleting_md(struct mapped_device *md)
333{
334 return test_bit(DMF_DELETING, &md->flags);
335}
336
323static int dm_blk_open(struct block_device *bdev, fmode_t mode) 337static int dm_blk_open(struct block_device *bdev, fmode_t mode)
324{ 338{
325 struct mapped_device *md; 339 struct mapped_device *md;
@@ -331,7 +345,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
331 goto out; 345 goto out;
332 346
333 if (test_bit(DMF_FREEING, &md->flags) || 347 if (test_bit(DMF_FREEING, &md->flags) ||
334 test_bit(DMF_DELETING, &md->flags)) { 348 dm_deleting_md(md)) {
335 md = NULL; 349 md = NULL;
336 goto out; 350 goto out;
337 } 351 }
@@ -388,7 +402,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
388 unsigned int cmd, unsigned long arg) 402 unsigned int cmd, unsigned long arg)
389{ 403{
390 struct mapped_device *md = bdev->bd_disk->private_data; 404 struct mapped_device *md = bdev->bd_disk->private_data;
391 struct dm_table *map = dm_get_table(md); 405 struct dm_table *map = dm_get_live_table(md);
392 struct dm_target *tgt; 406 struct dm_target *tgt;
393 int r = -ENOTTY; 407 int r = -ENOTTY;
394 408
@@ -401,7 +415,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
401 415
402 tgt = dm_table_get_target(map, 0); 416 tgt = dm_table_get_target(map, 0);
403 417
404 if (dm_suspended(md)) { 418 if (dm_suspended_md(md)) {
405 r = -EAGAIN; 419 r = -EAGAIN;
406 goto out; 420 goto out;
407 } 421 }
@@ -430,9 +444,10 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
430 mempool_free(tio, md->tio_pool); 444 mempool_free(tio, md->tio_pool);
431} 445}
432 446
433static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) 447static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
448 gfp_t gfp_mask)
434{ 449{
435 return mempool_alloc(md->tio_pool, GFP_ATOMIC); 450 return mempool_alloc(md->tio_pool, gfp_mask);
436} 451}
437 452
438static void free_rq_tio(struct dm_rq_target_io *tio) 453static void free_rq_tio(struct dm_rq_target_io *tio)
@@ -450,6 +465,12 @@ static void free_bio_info(struct dm_rq_clone_bio_info *info)
450 mempool_free(info, info->tio->md->io_pool); 465 mempool_free(info, info->tio->md->io_pool);
451} 466}
452 467
468static int md_in_flight(struct mapped_device *md)
469{
470 return atomic_read(&md->pending[READ]) +
471 atomic_read(&md->pending[WRITE]);
472}
473
453static void start_io_acct(struct dm_io *io) 474static void start_io_acct(struct dm_io *io)
454{ 475{
455 struct mapped_device *md = io->md; 476 struct mapped_device *md = io->md;
@@ -512,7 +533,7 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
512 * function to access the md->map field, and make sure they call 533 * function to access the md->map field, and make sure they call
513 * dm_table_put() when finished. 534 * dm_table_put() when finished.
514 */ 535 */
515struct dm_table *dm_get_table(struct mapped_device *md) 536struct dm_table *dm_get_live_table(struct mapped_device *md)
516{ 537{
517 struct dm_table *t; 538 struct dm_table *t;
518 unsigned long flags; 539 unsigned long flags;
@@ -716,28 +737,38 @@ static void end_clone_bio(struct bio *clone, int error)
716 blk_update_request(tio->orig, 0, nr_bytes); 737 blk_update_request(tio->orig, 0, nr_bytes);
717} 738}
718 739
740static void store_barrier_error(struct mapped_device *md, int error)
741{
742 unsigned long flags;
743
744 spin_lock_irqsave(&md->barrier_error_lock, flags);
745 /*
746 * Basically, the first error is taken, but:
747 * -EOPNOTSUPP supersedes any I/O error.
748 * Requeue request supersedes any I/O error but -EOPNOTSUPP.
749 */
750 if (!md->barrier_error || error == -EOPNOTSUPP ||
751 (md->barrier_error != -EOPNOTSUPP &&
752 error == DM_ENDIO_REQUEUE))
753 md->barrier_error = error;
754 spin_unlock_irqrestore(&md->barrier_error_lock, flags);
755}
756
719/* 757/*
720 * Don't touch any member of the md after calling this function because 758 * Don't touch any member of the md after calling this function because
721 * the md may be freed in dm_put() at the end of this function. 759 * the md may be freed in dm_put() at the end of this function.
722 * Or do dm_get() before calling this function and dm_put() later. 760 * Or do dm_get() before calling this function and dm_put() later.
723 */ 761 */
724static void rq_completed(struct mapped_device *md, int run_queue) 762static void rq_completed(struct mapped_device *md, int rw, int run_queue)
725{ 763{
726 int wakeup_waiters = 0; 764 atomic_dec(&md->pending[rw]);
727 struct request_queue *q = md->queue;
728 unsigned long flags;
729
730 spin_lock_irqsave(q->queue_lock, flags);
731 if (!queue_in_flight(q))
732 wakeup_waiters = 1;
733 spin_unlock_irqrestore(q->queue_lock, flags);
734 765
735 /* nudge anyone waiting on suspend queue */ 766 /* nudge anyone waiting on suspend queue */
736 if (wakeup_waiters) 767 if (!md_in_flight(md))
737 wake_up(&md->wait); 768 wake_up(&md->wait);
738 769
739 if (run_queue) 770 if (run_queue)
740 blk_run_queue(q); 771 blk_run_queue(md->queue);
741 772
742 /* 773 /*
743 * dm_put() must be at the end of this function. See the comment above 774 * dm_put() must be at the end of this function. See the comment above
@@ -753,6 +784,44 @@ static void free_rq_clone(struct request *clone)
753 free_rq_tio(tio); 784 free_rq_tio(tio);
754} 785}
755 786
787/*
788 * Complete the clone and the original request.
789 * Must be called without queue lock.
790 */
791static void dm_end_request(struct request *clone, int error)
792{
793 int rw = rq_data_dir(clone);
794 int run_queue = 1;
795 bool is_barrier = blk_barrier_rq(clone);
796 struct dm_rq_target_io *tio = clone->end_io_data;
797 struct mapped_device *md = tio->md;
798 struct request *rq = tio->orig;
799
800 if (blk_pc_request(rq) && !is_barrier) {
801 rq->errors = clone->errors;
802 rq->resid_len = clone->resid_len;
803
804 if (rq->sense)
805 /*
806 * We are using the sense buffer of the original
807 * request.
808 * So setting the length of the sense data is enough.
809 */
810 rq->sense_len = clone->sense_len;
811 }
812
813 free_rq_clone(clone);
814
815 if (unlikely(is_barrier)) {
816 if (unlikely(error))
817 store_barrier_error(md, error);
818 run_queue = 0;
819 } else
820 blk_end_request_all(rq, error);
821
822 rq_completed(md, rw, run_queue);
823}
824
756static void dm_unprep_request(struct request *rq) 825static void dm_unprep_request(struct request *rq)
757{ 826{
758 struct request *clone = rq->special; 827 struct request *clone = rq->special;
@@ -768,12 +837,23 @@ static void dm_unprep_request(struct request *rq)
768 */ 837 */
769void dm_requeue_unmapped_request(struct request *clone) 838void dm_requeue_unmapped_request(struct request *clone)
770{ 839{
840 int rw = rq_data_dir(clone);
771 struct dm_rq_target_io *tio = clone->end_io_data; 841 struct dm_rq_target_io *tio = clone->end_io_data;
772 struct mapped_device *md = tio->md; 842 struct mapped_device *md = tio->md;
773 struct request *rq = tio->orig; 843 struct request *rq = tio->orig;
774 struct request_queue *q = rq->q; 844 struct request_queue *q = rq->q;
775 unsigned long flags; 845 unsigned long flags;
776 846
847 if (unlikely(blk_barrier_rq(clone))) {
848 /*
849 * Barrier clones share an original request.
850 * Leave it to dm_end_request(), which handles this special
851 * case.
852 */
853 dm_end_request(clone, DM_ENDIO_REQUEUE);
854 return;
855 }
856
777 dm_unprep_request(rq); 857 dm_unprep_request(rq);
778 858
779 spin_lock_irqsave(q->queue_lock, flags); 859 spin_lock_irqsave(q->queue_lock, flags);
@@ -782,7 +862,7 @@ void dm_requeue_unmapped_request(struct request *clone)
782 blk_requeue_request(q, rq); 862 blk_requeue_request(q, rq);
783 spin_unlock_irqrestore(q->queue_lock, flags); 863 spin_unlock_irqrestore(q->queue_lock, flags);
784 864
785 rq_completed(md, 0); 865 rq_completed(md, rw, 0);
786} 866}
787EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); 867EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
788 868
@@ -815,34 +895,28 @@ static void start_queue(struct request_queue *q)
815 spin_unlock_irqrestore(q->queue_lock, flags); 895 spin_unlock_irqrestore(q->queue_lock, flags);
816} 896}
817 897
818/* 898static void dm_done(struct request *clone, int error, bool mapped)
819 * Complete the clone and the original request.
820 * Must be called without queue lock.
821 */
822static void dm_end_request(struct request *clone, int error)
823{ 899{
900 int r = error;
824 struct dm_rq_target_io *tio = clone->end_io_data; 901 struct dm_rq_target_io *tio = clone->end_io_data;
825 struct mapped_device *md = tio->md; 902 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
826 struct request *rq = tio->orig;
827 903
828 if (blk_pc_request(rq)) { 904 if (mapped && rq_end_io)
829 rq->errors = clone->errors; 905 r = rq_end_io(tio->ti, clone, error, &tio->info);
830 rq->resid_len = clone->resid_len;
831 906
832 if (rq->sense) 907 if (r <= 0)
833 /* 908 /* The target wants to complete the I/O */
834 * We are using the sense buffer of the original 909 dm_end_request(clone, r);
835 * request. 910 else if (r == DM_ENDIO_INCOMPLETE)
836 * So setting the length of the sense data is enough. 911 /* The target will handle the I/O */
837 */ 912 return;
838 rq->sense_len = clone->sense_len; 913 else if (r == DM_ENDIO_REQUEUE)
914 /* The target wants to requeue the I/O */
915 dm_requeue_unmapped_request(clone);
916 else {
917 DMWARN("unimplemented target endio return value: %d", r);
918 BUG();
839 } 919 }
840
841 free_rq_clone(clone);
842
843 blk_end_request_all(rq, error);
844
845 rq_completed(md, 1);
846} 920}
847 921
848/* 922/*
@@ -850,27 +924,14 @@ static void dm_end_request(struct request *clone, int error)
850 */ 924 */
851static void dm_softirq_done(struct request *rq) 925static void dm_softirq_done(struct request *rq)
852{ 926{
927 bool mapped = true;
853 struct request *clone = rq->completion_data; 928 struct request *clone = rq->completion_data;
854 struct dm_rq_target_io *tio = clone->end_io_data; 929 struct dm_rq_target_io *tio = clone->end_io_data;
855 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
856 int error = tio->error;
857 930
858 if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io) 931 if (rq->cmd_flags & REQ_FAILED)
859 error = rq_end_io(tio->ti, clone, error, &tio->info); 932 mapped = false;
860 933
861 if (error <= 0) 934 dm_done(clone, tio->error, mapped);
862 /* The target wants to complete the I/O */
863 dm_end_request(clone, error);
864 else if (error == DM_ENDIO_INCOMPLETE)
865 /* The target will handle the I/O */
866 return;
867 else if (error == DM_ENDIO_REQUEUE)
868 /* The target wants to requeue the I/O */
869 dm_requeue_unmapped_request(clone);
870 else {
871 DMWARN("unimplemented target endio return value: %d", error);
872 BUG();
873 }
874} 935}
875 936
876/* 937/*
@@ -882,6 +943,19 @@ static void dm_complete_request(struct request *clone, int error)
882 struct dm_rq_target_io *tio = clone->end_io_data; 943 struct dm_rq_target_io *tio = clone->end_io_data;
883 struct request *rq = tio->orig; 944 struct request *rq = tio->orig;
884 945
946 if (unlikely(blk_barrier_rq(clone))) {
947 /*
948 * Barrier clones share an original request. So can't use
949 * softirq_done with the original.
950 * Pass the clone to dm_done() directly in this special case.
951 * It is safe (even if clone->q->queue_lock is held here)
952 * because there is no I/O dispatching during the completion
953 * of barrier clone.
954 */
955 dm_done(clone, error, true);
956 return;
957 }
958
885 tio->error = error; 959 tio->error = error;
886 rq->completion_data = clone; 960 rq->completion_data = clone;
887 blk_complete_request(rq); 961 blk_complete_request(rq);
@@ -898,6 +972,17 @@ void dm_kill_unmapped_request(struct request *clone, int error)
898 struct dm_rq_target_io *tio = clone->end_io_data; 972 struct dm_rq_target_io *tio = clone->end_io_data;
899 struct request *rq = tio->orig; 973 struct request *rq = tio->orig;
900 974
975 if (unlikely(blk_barrier_rq(clone))) {
976 /*
977 * Barrier clones share an original request.
978 * Leave it to dm_end_request(), which handles this special
979 * case.
980 */
981 BUG_ON(error > 0);
982 dm_end_request(clone, error);
983 return;
984 }
985
901 rq->cmd_flags |= REQ_FAILED; 986 rq->cmd_flags |= REQ_FAILED;
902 dm_complete_request(clone, error); 987 dm_complete_request(clone, error);
903} 988}
@@ -1214,7 +1299,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1214 struct clone_info ci; 1299 struct clone_info ci;
1215 int error = 0; 1300 int error = 0;
1216 1301
1217 ci.map = dm_get_table(md); 1302 ci.map = dm_get_live_table(md);
1218 if (unlikely(!ci.map)) { 1303 if (unlikely(!ci.map)) {
1219 if (!bio_rw_flagged(bio, BIO_RW_BARRIER)) 1304 if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
1220 bio_io_error(bio); 1305 bio_io_error(bio);
@@ -1255,7 +1340,7 @@ static int dm_merge_bvec(struct request_queue *q,
1255 struct bio_vec *biovec) 1340 struct bio_vec *biovec)
1256{ 1341{
1257 struct mapped_device *md = q->queuedata; 1342 struct mapped_device *md = q->queuedata;
1258 struct dm_table *map = dm_get_table(md); 1343 struct dm_table *map = dm_get_live_table(md);
1259 struct dm_target *ti; 1344 struct dm_target *ti;
1260 sector_t max_sectors; 1345 sector_t max_sectors;
1261 int max_size = 0; 1346 int max_size = 0;
@@ -1352,11 +1437,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
1352{ 1437{
1353 struct mapped_device *md = q->queuedata; 1438 struct mapped_device *md = q->queuedata;
1354 1439
1355 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1356 bio_endio(bio, -EOPNOTSUPP);
1357 return 0;
1358 }
1359
1360 return md->saved_make_request_fn(q, bio); /* call __make_request() */ 1440 return md->saved_make_request_fn(q, bio); /* call __make_request() */
1361} 1441}
1362 1442
@@ -1375,6 +1455,25 @@ static int dm_request(struct request_queue *q, struct bio *bio)
1375 return _dm_request(q, bio); 1455 return _dm_request(q, bio);
1376} 1456}
1377 1457
1458/*
1459 * Mark this request as flush request, so that dm_request_fn() can
1460 * recognize.
1461 */
1462static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
1463{
1464 rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
1465 rq->cmd[0] = REQ_LB_OP_FLUSH;
1466}
1467
1468static bool dm_rq_is_flush_request(struct request *rq)
1469{
1470 if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1471 rq->cmd[0] == REQ_LB_OP_FLUSH)
1472 return true;
1473 else
1474 return false;
1475}
1476
1378void dm_dispatch_request(struct request *rq) 1477void dm_dispatch_request(struct request *rq)
1379{ 1478{
1380 int r; 1479 int r;
@@ -1420,25 +1519,54 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1420static int setup_clone(struct request *clone, struct request *rq, 1519static int setup_clone(struct request *clone, struct request *rq,
1421 struct dm_rq_target_io *tio) 1520 struct dm_rq_target_io *tio)
1422{ 1521{
1423 int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, 1522 int r;
1424 dm_rq_bio_constructor, tio);
1425 1523
1426 if (r) 1524 if (dm_rq_is_flush_request(rq)) {
1427 return r; 1525 blk_rq_init(NULL, clone);
1526 clone->cmd_type = REQ_TYPE_FS;
1527 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
1528 } else {
1529 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1530 dm_rq_bio_constructor, tio);
1531 if (r)
1532 return r;
1533
1534 clone->cmd = rq->cmd;
1535 clone->cmd_len = rq->cmd_len;
1536 clone->sense = rq->sense;
1537 clone->buffer = rq->buffer;
1538 }
1428 1539
1429 clone->cmd = rq->cmd;
1430 clone->cmd_len = rq->cmd_len;
1431 clone->sense = rq->sense;
1432 clone->buffer = rq->buffer;
1433 clone->end_io = end_clone_request; 1540 clone->end_io = end_clone_request;
1434 clone->end_io_data = tio; 1541 clone->end_io_data = tio;
1435 1542
1436 return 0; 1543 return 0;
1437} 1544}
1438 1545
1439static int dm_rq_flush_suspending(struct mapped_device *md) 1546static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1547 gfp_t gfp_mask)
1440{ 1548{
1441 return !md->suspend_rq.special; 1549 struct request *clone;
1550 struct dm_rq_target_io *tio;
1551
1552 tio = alloc_rq_tio(md, gfp_mask);
1553 if (!tio)
1554 return NULL;
1555
1556 tio->md = md;
1557 tio->ti = NULL;
1558 tio->orig = rq;
1559 tio->error = 0;
1560 memset(&tio->info, 0, sizeof(tio->info));
1561
1562 clone = &tio->clone;
1563 if (setup_clone(clone, rq, tio)) {
1564 /* -ENOMEM */
1565 free_rq_tio(tio);
1566 return NULL;
1567 }
1568
1569 return clone;
1442} 1570}
1443 1571
1444/* 1572/*
@@ -1447,39 +1575,19 @@ static int dm_rq_flush_suspending(struct mapped_device *md)
1447static int dm_prep_fn(struct request_queue *q, struct request *rq) 1575static int dm_prep_fn(struct request_queue *q, struct request *rq)
1448{ 1576{
1449 struct mapped_device *md = q->queuedata; 1577 struct mapped_device *md = q->queuedata;
1450 struct dm_rq_target_io *tio;
1451 struct request *clone; 1578 struct request *clone;
1452 1579
1453 if (unlikely(rq == &md->suspend_rq)) { 1580 if (unlikely(dm_rq_is_flush_request(rq)))
1454 if (dm_rq_flush_suspending(md)) 1581 return BLKPREP_OK;
1455 return BLKPREP_OK;
1456 else
1457 /* The flush suspend was interrupted */
1458 return BLKPREP_KILL;
1459 }
1460 1582
1461 if (unlikely(rq->special)) { 1583 if (unlikely(rq->special)) {
1462 DMWARN("Already has something in rq->special."); 1584 DMWARN("Already has something in rq->special.");
1463 return BLKPREP_KILL; 1585 return BLKPREP_KILL;
1464 } 1586 }
1465 1587
1466 tio = alloc_rq_tio(md); /* Only one for each original request */ 1588 clone = clone_rq(rq, md, GFP_ATOMIC);
1467 if (!tio) 1589 if (!clone)
1468 /* -ENOMEM */
1469 return BLKPREP_DEFER;
1470
1471 tio->md = md;
1472 tio->ti = NULL;
1473 tio->orig = rq;
1474 tio->error = 0;
1475 memset(&tio->info, 0, sizeof(tio->info));
1476
1477 clone = &tio->clone;
1478 if (setup_clone(clone, rq, tio)) {
1479 /* -ENOMEM */
1480 free_rq_tio(tio);
1481 return BLKPREP_DEFER; 1590 return BLKPREP_DEFER;
1482 }
1483 1591
1484 rq->special = clone; 1592 rq->special = clone;
1485 rq->cmd_flags |= REQ_DONTPREP; 1593 rq->cmd_flags |= REQ_DONTPREP;
@@ -1487,11 +1595,10 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1487 return BLKPREP_OK; 1595 return BLKPREP_OK;
1488} 1596}
1489 1597
1490static void map_request(struct dm_target *ti, struct request *rq, 1598static void map_request(struct dm_target *ti, struct request *clone,
1491 struct mapped_device *md) 1599 struct mapped_device *md)
1492{ 1600{
1493 int r; 1601 int r;
1494 struct request *clone = rq->special;
1495 struct dm_rq_target_io *tio = clone->end_io_data; 1602 struct dm_rq_target_io *tio = clone->end_io_data;
1496 1603
1497 /* 1604 /*
@@ -1511,6 +1618,8 @@ static void map_request(struct dm_target *ti, struct request *rq,
1511 break; 1618 break;
1512 case DM_MAPIO_REMAPPED: 1619 case DM_MAPIO_REMAPPED:
1513 /* The target has remapped the I/O so dispatch it */ 1620 /* The target has remapped the I/O so dispatch it */
1621 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1622 blk_rq_pos(tio->orig));
1514 dm_dispatch_request(clone); 1623 dm_dispatch_request(clone);
1515 break; 1624 break;
1516 case DM_MAPIO_REQUEUE: 1625 case DM_MAPIO_REQUEUE:
@@ -1536,29 +1645,26 @@ static void map_request(struct dm_target *ti, struct request *rq,
1536static void dm_request_fn(struct request_queue *q) 1645static void dm_request_fn(struct request_queue *q)
1537{ 1646{
1538 struct mapped_device *md = q->queuedata; 1647 struct mapped_device *md = q->queuedata;
1539 struct dm_table *map = dm_get_table(md); 1648 struct dm_table *map = dm_get_live_table(md);
1540 struct dm_target *ti; 1649 struct dm_target *ti;
1541 struct request *rq; 1650 struct request *rq, *clone;
1542 1651
1543 /* 1652 /*
1544 * For noflush suspend, check blk_queue_stopped() to immediately 1653 * For suspend, check blk_queue_stopped() and increment
1545 * quit I/O dispatching. 1654 * ->pending within a single queue_lock not to increment the
1655 * number of in-flight I/Os after the queue is stopped in
1656 * dm_suspend().
1546 */ 1657 */
1547 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { 1658 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1548 rq = blk_peek_request(q); 1659 rq = blk_peek_request(q);
1549 if (!rq) 1660 if (!rq)
1550 goto plug_and_out; 1661 goto plug_and_out;
1551 1662
1552 if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ 1663 if (unlikely(dm_rq_is_flush_request(rq))) {
1553 if (queue_in_flight(q)) 1664 BUG_ON(md->flush_request);
1554 /* Not quiet yet. Wait more */ 1665 md->flush_request = rq;
1555 goto plug_and_out;
1556
1557 /* This device should be quiet now */
1558 __stop_queue(q);
1559 blk_start_request(rq); 1666 blk_start_request(rq);
1560 __blk_end_request_all(rq, 0); 1667 queue_work(md->wq, &md->barrier_work);
1561 wake_up(&md->wait);
1562 goto out; 1668 goto out;
1563 } 1669 }
1564 1670
@@ -1567,8 +1673,11 @@ static void dm_request_fn(struct request_queue *q)
1567 goto plug_and_out; 1673 goto plug_and_out;
1568 1674
1569 blk_start_request(rq); 1675 blk_start_request(rq);
1676 clone = rq->special;
1677 atomic_inc(&md->pending[rq_data_dir(clone)]);
1678
1570 spin_unlock(q->queue_lock); 1679 spin_unlock(q->queue_lock);
1571 map_request(ti, rq, md); 1680 map_request(ti, clone, md);
1572 spin_lock_irq(q->queue_lock); 1681 spin_lock_irq(q->queue_lock);
1573 } 1682 }
1574 1683
@@ -1595,7 +1704,7 @@ static int dm_lld_busy(struct request_queue *q)
1595{ 1704{
1596 int r; 1705 int r;
1597 struct mapped_device *md = q->queuedata; 1706 struct mapped_device *md = q->queuedata;
1598 struct dm_table *map = dm_get_table(md); 1707 struct dm_table *map = dm_get_live_table(md);
1599 1708
1600 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) 1709 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
1601 r = 1; 1710 r = 1;
@@ -1610,7 +1719,7 @@ static int dm_lld_busy(struct request_queue *q)
1610static void dm_unplug_all(struct request_queue *q) 1719static void dm_unplug_all(struct request_queue *q)
1611{ 1720{
1612 struct mapped_device *md = q->queuedata; 1721 struct mapped_device *md = q->queuedata;
1613 struct dm_table *map = dm_get_table(md); 1722 struct dm_table *map = dm_get_live_table(md);
1614 1723
1615 if (map) { 1724 if (map) {
1616 if (dm_request_based(md)) 1725 if (dm_request_based(md))
@@ -1628,7 +1737,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
1628 struct dm_table *map; 1737 struct dm_table *map;
1629 1738
1630 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1739 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1631 map = dm_get_table(md); 1740 map = dm_get_live_table(md);
1632 if (map) { 1741 if (map) {
1633 /* 1742 /*
1634 * Request-based dm cares about only own queue for 1743 * Request-based dm cares about only own queue for
@@ -1725,6 +1834,7 @@ out:
1725static const struct block_device_operations dm_blk_dops; 1834static const struct block_device_operations dm_blk_dops;
1726 1835
1727static void dm_wq_work(struct work_struct *work); 1836static void dm_wq_work(struct work_struct *work);
1837static void dm_rq_barrier_work(struct work_struct *work);
1728 1838
1729/* 1839/*
1730 * Allocate and initialise a blank device with a given minor. 1840 * Allocate and initialise a blank device with a given minor.
@@ -1754,6 +1864,7 @@ static struct mapped_device *alloc_dev(int minor)
1754 init_rwsem(&md->io_lock); 1864 init_rwsem(&md->io_lock);
1755 mutex_init(&md->suspend_lock); 1865 mutex_init(&md->suspend_lock);
1756 spin_lock_init(&md->deferred_lock); 1866 spin_lock_init(&md->deferred_lock);
1867 spin_lock_init(&md->barrier_error_lock);
1757 rwlock_init(&md->map_lock); 1868 rwlock_init(&md->map_lock);
1758 atomic_set(&md->holders, 1); 1869 atomic_set(&md->holders, 1);
1759 atomic_set(&md->open_count, 0); 1870 atomic_set(&md->open_count, 0);
@@ -1788,6 +1899,8 @@ static struct mapped_device *alloc_dev(int minor)
1788 blk_queue_softirq_done(md->queue, dm_softirq_done); 1899 blk_queue_softirq_done(md->queue, dm_softirq_done);
1789 blk_queue_prep_rq(md->queue, dm_prep_fn); 1900 blk_queue_prep_rq(md->queue, dm_prep_fn);
1790 blk_queue_lld_busy(md->queue, dm_lld_busy); 1901 blk_queue_lld_busy(md->queue, dm_lld_busy);
1902 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
1903 dm_rq_prepare_flush);
1791 1904
1792 md->disk = alloc_disk(1); 1905 md->disk = alloc_disk(1);
1793 if (!md->disk) 1906 if (!md->disk)
@@ -1797,6 +1910,7 @@ static struct mapped_device *alloc_dev(int minor)
1797 atomic_set(&md->pending[1], 0); 1910 atomic_set(&md->pending[1], 0);
1798 init_waitqueue_head(&md->wait); 1911 init_waitqueue_head(&md->wait);
1799 INIT_WORK(&md->work, dm_wq_work); 1912 INIT_WORK(&md->work, dm_wq_work);
1913 INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1800 init_waitqueue_head(&md->eventq); 1914 init_waitqueue_head(&md->eventq);
1801 1915
1802 md->disk->major = _major; 1916 md->disk->major = _major;
@@ -1921,9 +2035,13 @@ static void __set_size(struct mapped_device *md, sector_t size)
1921 mutex_unlock(&md->bdev->bd_inode->i_mutex); 2035 mutex_unlock(&md->bdev->bd_inode->i_mutex);
1922} 2036}
1923 2037
1924static int __bind(struct mapped_device *md, struct dm_table *t, 2038/*
1925 struct queue_limits *limits) 2039 * Returns old map, which caller must destroy.
2040 */
2041static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2042 struct queue_limits *limits)
1926{ 2043{
2044 struct dm_table *old_map;
1927 struct request_queue *q = md->queue; 2045 struct request_queue *q = md->queue;
1928 sector_t size; 2046 sector_t size;
1929 unsigned long flags; 2047 unsigned long flags;
@@ -1938,11 +2056,6 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
1938 2056
1939 __set_size(md, size); 2057 __set_size(md, size);
1940 2058
1941 if (!size) {
1942 dm_table_destroy(t);
1943 return 0;
1944 }
1945
1946 dm_table_event_callback(t, event_callback, md); 2059 dm_table_event_callback(t, event_callback, md);
1947 2060
1948 /* 2061 /*
@@ -1958,26 +2071,31 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
1958 __bind_mempools(md, t); 2071 __bind_mempools(md, t);
1959 2072
1960 write_lock_irqsave(&md->map_lock, flags); 2073 write_lock_irqsave(&md->map_lock, flags);
2074 old_map = md->map;
1961 md->map = t; 2075 md->map = t;
1962 dm_table_set_restrictions(t, q, limits); 2076 dm_table_set_restrictions(t, q, limits);
1963 write_unlock_irqrestore(&md->map_lock, flags); 2077 write_unlock_irqrestore(&md->map_lock, flags);
1964 2078
1965 return 0; 2079 return old_map;
1966} 2080}
1967 2081
1968static void __unbind(struct mapped_device *md) 2082/*
2083 * Returns unbound table for the caller to free.
2084 */
2085static struct dm_table *__unbind(struct mapped_device *md)
1969{ 2086{
1970 struct dm_table *map = md->map; 2087 struct dm_table *map = md->map;
1971 unsigned long flags; 2088 unsigned long flags;
1972 2089
1973 if (!map) 2090 if (!map)
1974 return; 2091 return NULL;
1975 2092
1976 dm_table_event_callback(map, NULL, NULL); 2093 dm_table_event_callback(map, NULL, NULL);
1977 write_lock_irqsave(&md->map_lock, flags); 2094 write_lock_irqsave(&md->map_lock, flags);
1978 md->map = NULL; 2095 md->map = NULL;
1979 write_unlock_irqrestore(&md->map_lock, flags); 2096 write_unlock_irqrestore(&md->map_lock, flags);
1980 dm_table_destroy(map); 2097
2098 return map;
1981} 2099}
1982 2100
1983/* 2101/*
@@ -2059,18 +2177,18 @@ void dm_put(struct mapped_device *md)
2059 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 2177 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2060 2178
2061 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 2179 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
2062 map = dm_get_table(md); 2180 map = dm_get_live_table(md);
2063 idr_replace(&_minor_idr, MINOR_ALLOCED, 2181 idr_replace(&_minor_idr, MINOR_ALLOCED,
2064 MINOR(disk_devt(dm_disk(md)))); 2182 MINOR(disk_devt(dm_disk(md))));
2065 set_bit(DMF_FREEING, &md->flags); 2183 set_bit(DMF_FREEING, &md->flags);
2066 spin_unlock(&_minor_lock); 2184 spin_unlock(&_minor_lock);
2067 if (!dm_suspended(md)) { 2185 if (!dm_suspended_md(md)) {
2068 dm_table_presuspend_targets(map); 2186 dm_table_presuspend_targets(map);
2069 dm_table_postsuspend_targets(map); 2187 dm_table_postsuspend_targets(map);
2070 } 2188 }
2071 dm_sysfs_exit(md); 2189 dm_sysfs_exit(md);
2072 dm_table_put(map); 2190 dm_table_put(map);
2073 __unbind(md); 2191 dm_table_destroy(__unbind(md));
2074 free_dev(md); 2192 free_dev(md);
2075 } 2193 }
2076} 2194}
@@ -2080,8 +2198,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2080{ 2198{
2081 int r = 0; 2199 int r = 0;
2082 DECLARE_WAITQUEUE(wait, current); 2200 DECLARE_WAITQUEUE(wait, current);
2083 struct request_queue *q = md->queue;
2084 unsigned long flags;
2085 2201
2086 dm_unplug_all(md->queue); 2202 dm_unplug_all(md->queue);
2087 2203
@@ -2091,15 +2207,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2091 set_current_state(interruptible); 2207 set_current_state(interruptible);
2092 2208
2093 smp_mb(); 2209 smp_mb();
2094 if (dm_request_based(md)) { 2210 if (!md_in_flight(md))
2095 spin_lock_irqsave(q->queue_lock, flags);
2096 if (!queue_in_flight(q) && blk_queue_stopped(q)) {
2097 spin_unlock_irqrestore(q->queue_lock, flags);
2098 break;
2099 }
2100 spin_unlock_irqrestore(q->queue_lock, flags);
2101 } else if (!atomic_read(&md->pending[0]) &&
2102 !atomic_read(&md->pending[1]))
2103 break; 2211 break;
2104 2212
2105 if (interruptible == TASK_INTERRUPTIBLE && 2213 if (interruptible == TASK_INTERRUPTIBLE &&
@@ -2194,98 +2302,106 @@ static void dm_queue_flush(struct mapped_device *md)
2194 queue_work(md->wq, &md->work); 2302 queue_work(md->wq, &md->work);
2195} 2303}
2196 2304
2197/* 2305static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
2198 * Swap in a new table (destroying old one).
2199 */
2200int dm_swap_table(struct mapped_device *md, struct dm_table *table)
2201{ 2306{
2202 struct queue_limits limits; 2307 struct dm_rq_target_io *tio = clone->end_io_data;
2203 int r = -EINVAL;
2204 2308
2205 mutex_lock(&md->suspend_lock); 2309 tio->info.flush_request = flush_nr;
2310}
2206 2311
2207 /* device must be suspended */ 2312/* Issue barrier requests to targets and wait for their completion. */
2208 if (!dm_suspended(md)) 2313static int dm_rq_barrier(struct mapped_device *md)
2209 goto out; 2314{
2315 int i, j;
2316 struct dm_table *map = dm_get_live_table(md);
2317 unsigned num_targets = dm_table_get_num_targets(map);
2318 struct dm_target *ti;
2319 struct request *clone;
2210 2320
2211 r = dm_calculate_queue_limits(table, &limits); 2321 md->barrier_error = 0;
2212 if (r)
2213 goto out;
2214 2322
2215 /* cannot change the device type, once a table is bound */ 2323 for (i = 0; i < num_targets; i++) {
2216 if (md->map && 2324 ti = dm_table_get_target(map, i);
2217 (dm_table_get_type(md->map) != dm_table_get_type(table))) { 2325 for (j = 0; j < ti->num_flush_requests; j++) {
2218 DMWARN("can't change the device type after a table is bound"); 2326 clone = clone_rq(md->flush_request, md, GFP_NOIO);
2219 goto out; 2327 dm_rq_set_flush_nr(clone, j);
2328 atomic_inc(&md->pending[rq_data_dir(clone)]);
2329 map_request(ti, clone, md);
2330 }
2220 } 2331 }
2221 2332
2222 __unbind(md); 2333 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2223 r = __bind(md, table, &limits); 2334 dm_table_put(map);
2224
2225out:
2226 mutex_unlock(&md->suspend_lock);
2227 return r;
2228}
2229 2335
2230static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) 2336 return md->barrier_error;
2231{
2232 md->suspend_rq.special = (void *)0x1;
2233} 2337}
2234 2338
2235static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) 2339static void dm_rq_barrier_work(struct work_struct *work)
2236{ 2340{
2341 int error;
2342 struct mapped_device *md = container_of(work, struct mapped_device,
2343 barrier_work);
2237 struct request_queue *q = md->queue; 2344 struct request_queue *q = md->queue;
2345 struct request *rq;
2238 unsigned long flags; 2346 unsigned long flags;
2239 2347
2240 spin_lock_irqsave(q->queue_lock, flags); 2348 /*
2241 if (!noflush) 2349 * Hold the md reference here and leave it at the last part so that
2242 dm_rq_invalidate_suspend_marker(md); 2350 * the md can't be deleted by device opener when the barrier request
2243 __start_queue(q); 2351 * completes.
2244 spin_unlock_irqrestore(q->queue_lock, flags); 2352 */
2245} 2353 dm_get(md);
2246 2354
2247static void dm_rq_start_suspend(struct mapped_device *md, int noflush) 2355 error = dm_rq_barrier(md);
2248{
2249 struct request *rq = &md->suspend_rq;
2250 struct request_queue *q = md->queue;
2251 2356
2252 if (noflush) 2357 rq = md->flush_request;
2253 stop_queue(q); 2358 md->flush_request = NULL;
2254 else { 2359
2255 blk_rq_init(q, rq); 2360 if (error == DM_ENDIO_REQUEUE) {
2256 blk_insert_request(q, rq, 0, NULL); 2361 spin_lock_irqsave(q->queue_lock, flags);
2257 } 2362 blk_requeue_request(q, rq);
2363 spin_unlock_irqrestore(q->queue_lock, flags);
2364 } else
2365 blk_end_request_all(rq, error);
2366
2367 blk_run_queue(q);
2368
2369 dm_put(md);
2258} 2370}
2259 2371
2260static int dm_rq_suspend_available(struct mapped_device *md, int noflush) 2372/*
2373 * Swap in a new table, returning the old one for the caller to destroy.
2374 */
2375struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2261{ 2376{
2262 int r = 1; 2377 struct dm_table *map = ERR_PTR(-EINVAL);
2263 struct request *rq = &md->suspend_rq; 2378 struct queue_limits limits;
2264 struct request_queue *q = md->queue; 2379 int r;
2265 unsigned long flags;
2266 2380
2267 if (noflush) 2381 mutex_lock(&md->suspend_lock);
2268 return r;
2269 2382
2270 /* The marker must be protected by queue lock if it is in use */ 2383 /* device must be suspended */
2271 spin_lock_irqsave(q->queue_lock, flags); 2384 if (!dm_suspended_md(md))
2272 if (unlikely(rq->ref_count)) { 2385 goto out;
2273 /* 2386
2274 * This can happen, when the previous flush suspend was 2387 r = dm_calculate_queue_limits(table, &limits);
2275 * interrupted, the marker is still in the queue and 2388 if (r) {
2276 * this flush suspend has been invoked, because we don't 2389 map = ERR_PTR(r);
2277 * remove the marker at the time of suspend interruption. 2390 goto out;
2278 * We have only one marker per mapped_device, so we can't
2279 * start another flush suspend while it is in use.
2280 */
2281 BUG_ON(!rq->special); /* The marker should be invalidated */
2282 DMWARN("Invalidating the previous flush suspend is still in"
2283 " progress. Please retry later.");
2284 r = 0;
2285 } 2391 }
2286 spin_unlock_irqrestore(q->queue_lock, flags);
2287 2392
2288 return r; 2393 /* cannot change the device type, once a table is bound */
2394 if (md->map &&
2395 (dm_table_get_type(md->map) != dm_table_get_type(table))) {
2396 DMWARN("can't change the device type after a table is bound");
2397 goto out;
2398 }
2399
2400 map = __bind(md, table, &limits);
2401
2402out:
2403 mutex_unlock(&md->suspend_lock);
2404 return map;
2289} 2405}
2290 2406
2291/* 2407/*
@@ -2330,49 +2446,11 @@ static void unlock_fs(struct mapped_device *md)
2330/* 2446/*
2331 * Suspend mechanism in request-based dm. 2447 * Suspend mechanism in request-based dm.
2332 * 2448 *
2333 * After the suspend starts, further incoming requests are kept in 2449 * 1. Flush all I/Os by lock_fs() if needed.
2334 * the request_queue and deferred. 2450 * 2. Stop dispatching any I/O by stopping the request_queue.
2335 * Remaining requests in the request_queue at the start of suspend are flushed 2451 * 3. Wait for all in-flight I/Os to be completed or requeued.
2336 * if it is flush suspend.
2337 * The suspend completes when the following conditions have been satisfied,
2338 * so wait for it:
2339 * 1. q->in_flight is 0 (which means no in_flight request)
2340 * 2. queue has been stopped (which means no request dispatching)
2341 *
2342 * 2452 *
2343 * Noflush suspend 2453 * To abort suspend, start the request_queue.
2344 * ---------------
2345 * Noflush suspend doesn't need to dispatch remaining requests.
2346 * So stop the queue immediately. Then, wait for all in_flight requests
2347 * to be completed or requeued.
2348 *
2349 * To abort noflush suspend, start the queue.
2350 *
2351 *
2352 * Flush suspend
2353 * -------------
2354 * Flush suspend needs to dispatch remaining requests. So stop the queue
2355 * after the remaining requests are completed. (Requeued request must be also
2356 * re-dispatched and completed. Until then, we can't stop the queue.)
2357 *
2358 * During flushing the remaining requests, further incoming requests are also
2359 * inserted to the same queue. To distinguish which requests are to be
2360 * flushed, we insert a marker request to the queue at the time of starting
2361 * flush suspend, like a barrier.
2362 * The dispatching is blocked when the marker is found on the top of the queue.
2363 * And the queue is stopped when all in_flight requests are completed, since
2364 * that means the remaining requests are completely flushed.
2365 * Then, the marker is removed from the queue.
2366 *
2367 * To abort flush suspend, we also need to take care of the marker, not only
2368 * starting the queue.
2369 * We don't remove the marker forcibly from the queue since it's against
2370 * the block-layer manner. Instead, we put a invalidated mark on the marker.
2371 * When the invalidated marker is found on the top of the queue, it is
2372 * immediately removed from the queue, so it doesn't block dispatching.
2373 * Because we have only one marker per mapped_device, we can't start another
2374 * flush suspend until the invalidated marker is removed from the queue.
2375 * So fail and return with -EBUSY in such a case.
2376 */ 2454 */
2377int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2455int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2378{ 2456{
@@ -2383,17 +2461,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2383 2461
2384 mutex_lock(&md->suspend_lock); 2462 mutex_lock(&md->suspend_lock);
2385 2463
2386 if (dm_suspended(md)) { 2464 if (dm_suspended_md(md)) {
2387 r = -EINVAL; 2465 r = -EINVAL;
2388 goto out_unlock; 2466 goto out_unlock;
2389 } 2467 }
2390 2468
2391 if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { 2469 map = dm_get_live_table(md);
2392 r = -EBUSY;
2393 goto out_unlock;
2394 }
2395
2396 map = dm_get_table(md);
2397 2470
2398 /* 2471 /*
2399 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 2472 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2406,8 +2479,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2406 dm_table_presuspend_targets(map); 2479 dm_table_presuspend_targets(map);
2407 2480
2408 /* 2481 /*
2409 * Flush I/O to the device. noflush supersedes do_lockfs, 2482 * Flush I/O to the device.
2410 * because lock_fs() needs to flush I/Os. 2483 * Any I/O submitted after lock_fs() may not be flushed.
2484 * noflush takes precedence over do_lockfs.
2485 * (lock_fs() flushes I/Os and waits for them to complete.)
2411 */ 2486 */
2412 if (!noflush && do_lockfs) { 2487 if (!noflush && do_lockfs) {
2413 r = lock_fs(md); 2488 r = lock_fs(md);
@@ -2436,10 +2511,15 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2436 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 2511 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2437 up_write(&md->io_lock); 2512 up_write(&md->io_lock);
2438 2513
2439 flush_workqueue(md->wq); 2514 /*
2440 2515 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
2516 * can be kicked until md->queue is stopped. So stop md->queue before
2517 * flushing md->wq.
2518 */
2441 if (dm_request_based(md)) 2519 if (dm_request_based(md))
2442 dm_rq_start_suspend(md, noflush); 2520 stop_queue(md->queue);
2521
2522 flush_workqueue(md->wq);
2443 2523
2444 /* 2524 /*
2445 * At this point no more requests are entering target request routines. 2525 * At this point no more requests are entering target request routines.
@@ -2458,7 +2538,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2458 dm_queue_flush(md); 2538 dm_queue_flush(md);
2459 2539
2460 if (dm_request_based(md)) 2540 if (dm_request_based(md))
2461 dm_rq_abort_suspend(md, noflush); 2541 start_queue(md->queue);
2462 2542
2463 unlock_fs(md); 2543 unlock_fs(md);
2464 goto out; /* pushback list is already flushed, so skip flush */ 2544 goto out; /* pushback list is already flushed, so skip flush */
@@ -2470,10 +2550,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2470 * requests are being added to md->deferred list. 2550 * requests are being added to md->deferred list.
2471 */ 2551 */
2472 2552
2473 dm_table_postsuspend_targets(map);
2474
2475 set_bit(DMF_SUSPENDED, &md->flags); 2553 set_bit(DMF_SUSPENDED, &md->flags);
2476 2554
2555 dm_table_postsuspend_targets(map);
2556
2477out: 2557out:
2478 dm_table_put(map); 2558 dm_table_put(map);
2479 2559
@@ -2488,10 +2568,10 @@ int dm_resume(struct mapped_device *md)
2488 struct dm_table *map = NULL; 2568 struct dm_table *map = NULL;
2489 2569
2490 mutex_lock(&md->suspend_lock); 2570 mutex_lock(&md->suspend_lock);
2491 if (!dm_suspended(md)) 2571 if (!dm_suspended_md(md))
2492 goto out; 2572 goto out;
2493 2573
2494 map = dm_get_table(md); 2574 map = dm_get_live_table(md);
2495 if (!map || !dm_table_get_size(map)) 2575 if (!map || !dm_table_get_size(map))
2496 goto out; 2576 goto out;
2497 2577
@@ -2592,18 +2672,29 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2592 return NULL; 2672 return NULL;
2593 2673
2594 if (test_bit(DMF_FREEING, &md->flags) || 2674 if (test_bit(DMF_FREEING, &md->flags) ||
2595 test_bit(DMF_DELETING, &md->flags)) 2675 dm_deleting_md(md))
2596 return NULL; 2676 return NULL;
2597 2677
2598 dm_get(md); 2678 dm_get(md);
2599 return md; 2679 return md;
2600} 2680}
2601 2681
2602int dm_suspended(struct mapped_device *md) 2682int dm_suspended_md(struct mapped_device *md)
2603{ 2683{
2604 return test_bit(DMF_SUSPENDED, &md->flags); 2684 return test_bit(DMF_SUSPENDED, &md->flags);
2605} 2685}
2606 2686
2687int dm_suspended(struct dm_target *ti)
2688{
2689 struct mapped_device *md = dm_table_get_md(ti->table);
2690 int r = dm_suspended_md(md);
2691
2692 dm_put(md);
2693
2694 return r;
2695}
2696EXPORT_SYMBOL_GPL(dm_suspended);
2697
2607int dm_noflush_suspending(struct dm_target *ti) 2698int dm_noflush_suspending(struct dm_target *ti)
2608{ 2699{
2609 struct mapped_device *md = dm_table_get_md(ti->table); 2700 struct mapped_device *md = dm_table_get_md(ti->table);