aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c673
1 files changed, 384 insertions, 289 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 724efc63904d..d21e1284604f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -143,9 +143,19 @@ struct mapped_device {
143 int barrier_error; 143 int barrier_error;
144 144
145 /* 145 /*
146 * Protect barrier_error from concurrent endio processing
147 * in request-based dm.
148 */
149 spinlock_t barrier_error_lock;
150
151 /*
146 * Processing queue (flush/barriers) 152 * Processing queue (flush/barriers)
147 */ 153 */
148 struct workqueue_struct *wq; 154 struct workqueue_struct *wq;
155 struct work_struct barrier_work;
156
157 /* A pointer to the currently processing pre/post flush request */
158 struct request *flush_request;
149 159
150 /* 160 /*
151 * The current mapping. 161 * The current mapping.
@@ -178,9 +188,6 @@ struct mapped_device {
178 /* forced geometry settings */ 188 /* forced geometry settings */
179 struct hd_geometry geometry; 189 struct hd_geometry geometry;
180 190
181 /* marker of flush suspend for request-based dm */
182 struct request suspend_rq;
183
184 /* For saving the address of __make_request for request based dm */ 191 /* For saving the address of __make_request for request based dm */
185 make_request_fn *saved_make_request_fn; 192 make_request_fn *saved_make_request_fn;
186 193
@@ -275,6 +282,7 @@ static int (*_inits[])(void) __initdata = {
275 dm_target_init, 282 dm_target_init,
276 dm_linear_init, 283 dm_linear_init,
277 dm_stripe_init, 284 dm_stripe_init,
285 dm_io_init,
278 dm_kcopyd_init, 286 dm_kcopyd_init,
279 dm_interface_init, 287 dm_interface_init,
280}; 288};
@@ -284,6 +292,7 @@ static void (*_exits[])(void) = {
284 dm_target_exit, 292 dm_target_exit,
285 dm_linear_exit, 293 dm_linear_exit,
286 dm_stripe_exit, 294 dm_stripe_exit,
295 dm_io_exit,
287 dm_kcopyd_exit, 296 dm_kcopyd_exit,
288 dm_interface_exit, 297 dm_interface_exit,
289}; 298};
@@ -320,6 +329,11 @@ static void __exit dm_exit(void)
320/* 329/*
321 * Block device functions 330 * Block device functions
322 */ 331 */
332int dm_deleting_md(struct mapped_device *md)
333{
334 return test_bit(DMF_DELETING, &md->flags);
335}
336
323static int dm_blk_open(struct block_device *bdev, fmode_t mode) 337static int dm_blk_open(struct block_device *bdev, fmode_t mode)
324{ 338{
325 struct mapped_device *md; 339 struct mapped_device *md;
@@ -331,7 +345,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
331 goto out; 345 goto out;
332 346
333 if (test_bit(DMF_FREEING, &md->flags) || 347 if (test_bit(DMF_FREEING, &md->flags) ||
334 test_bit(DMF_DELETING, &md->flags)) { 348 dm_deleting_md(md)) {
335 md = NULL; 349 md = NULL;
336 goto out; 350 goto out;
337 } 351 }
@@ -388,7 +402,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
388 unsigned int cmd, unsigned long arg) 402 unsigned int cmd, unsigned long arg)
389{ 403{
390 struct mapped_device *md = bdev->bd_disk->private_data; 404 struct mapped_device *md = bdev->bd_disk->private_data;
391 struct dm_table *map = dm_get_table(md); 405 struct dm_table *map = dm_get_live_table(md);
392 struct dm_target *tgt; 406 struct dm_target *tgt;
393 int r = -ENOTTY; 407 int r = -ENOTTY;
394 408
@@ -401,7 +415,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
401 415
402 tgt = dm_table_get_target(map, 0); 416 tgt = dm_table_get_target(map, 0);
403 417
404 if (dm_suspended(md)) { 418 if (dm_suspended_md(md)) {
405 r = -EAGAIN; 419 r = -EAGAIN;
406 goto out; 420 goto out;
407 } 421 }
@@ -430,9 +444,10 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
430 mempool_free(tio, md->tio_pool); 444 mempool_free(tio, md->tio_pool);
431} 445}
432 446
433static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) 447static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
448 gfp_t gfp_mask)
434{ 449{
435 return mempool_alloc(md->tio_pool, GFP_ATOMIC); 450 return mempool_alloc(md->tio_pool, gfp_mask);
436} 451}
437 452
438static void free_rq_tio(struct dm_rq_target_io *tio) 453static void free_rq_tio(struct dm_rq_target_io *tio)
@@ -450,6 +465,12 @@ static void free_bio_info(struct dm_rq_clone_bio_info *info)
450 mempool_free(info, info->tio->md->io_pool); 465 mempool_free(info, info->tio->md->io_pool);
451} 466}
452 467
468static int md_in_flight(struct mapped_device *md)
469{
470 return atomic_read(&md->pending[READ]) +
471 atomic_read(&md->pending[WRITE]);
472}
473
453static void start_io_acct(struct dm_io *io) 474static void start_io_acct(struct dm_io *io)
454{ 475{
455 struct mapped_device *md = io->md; 476 struct mapped_device *md = io->md;
@@ -512,7 +533,7 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
512 * function to access the md->map field, and make sure they call 533 * function to access the md->map field, and make sure they call
513 * dm_table_put() when finished. 534 * dm_table_put() when finished.
514 */ 535 */
515struct dm_table *dm_get_table(struct mapped_device *md) 536struct dm_table *dm_get_live_table(struct mapped_device *md)
516{ 537{
517 struct dm_table *t; 538 struct dm_table *t;
518 unsigned long flags; 539 unsigned long flags;
@@ -614,8 +635,10 @@ static void dec_pending(struct dm_io *io, int error)
614 if (!md->barrier_error && io_error != -EOPNOTSUPP) 635 if (!md->barrier_error && io_error != -EOPNOTSUPP)
615 md->barrier_error = io_error; 636 md->barrier_error = io_error;
616 end_io_acct(io); 637 end_io_acct(io);
638 free_io(md, io);
617 } else { 639 } else {
618 end_io_acct(io); 640 end_io_acct(io);
641 free_io(md, io);
619 642
620 if (io_error != DM_ENDIO_REQUEUE) { 643 if (io_error != DM_ENDIO_REQUEUE) {
621 trace_block_bio_complete(md->queue, bio); 644 trace_block_bio_complete(md->queue, bio);
@@ -623,8 +646,6 @@ static void dec_pending(struct dm_io *io, int error)
623 bio_endio(bio, io_error); 646 bio_endio(bio, io_error);
624 } 647 }
625 } 648 }
626
627 free_io(md, io);
628 } 649 }
629} 650}
630 651
@@ -716,28 +737,38 @@ static void end_clone_bio(struct bio *clone, int error)
716 blk_update_request(tio->orig, 0, nr_bytes); 737 blk_update_request(tio->orig, 0, nr_bytes);
717} 738}
718 739
740static void store_barrier_error(struct mapped_device *md, int error)
741{
742 unsigned long flags;
743
744 spin_lock_irqsave(&md->barrier_error_lock, flags);
745 /*
746 * Basically, the first error is taken, but:
747 * -EOPNOTSUPP supersedes any I/O error.
748 * Requeue request supersedes any I/O error but -EOPNOTSUPP.
749 */
750 if (!md->barrier_error || error == -EOPNOTSUPP ||
751 (md->barrier_error != -EOPNOTSUPP &&
752 error == DM_ENDIO_REQUEUE))
753 md->barrier_error = error;
754 spin_unlock_irqrestore(&md->barrier_error_lock, flags);
755}
756
719/* 757/*
720 * Don't touch any member of the md after calling this function because 758 * Don't touch any member of the md after calling this function because
721 * the md may be freed in dm_put() at the end of this function. 759 * the md may be freed in dm_put() at the end of this function.
722 * Or do dm_get() before calling this function and dm_put() later. 760 * Or do dm_get() before calling this function and dm_put() later.
723 */ 761 */
724static void rq_completed(struct mapped_device *md, int run_queue) 762static void rq_completed(struct mapped_device *md, int rw, int run_queue)
725{ 763{
726 int wakeup_waiters = 0; 764 atomic_dec(&md->pending[rw]);
727 struct request_queue *q = md->queue;
728 unsigned long flags;
729
730 spin_lock_irqsave(q->queue_lock, flags);
731 if (!queue_in_flight(q))
732 wakeup_waiters = 1;
733 spin_unlock_irqrestore(q->queue_lock, flags);
734 765
735 /* nudge anyone waiting on suspend queue */ 766 /* nudge anyone waiting on suspend queue */
736 if (wakeup_waiters) 767 if (!md_in_flight(md))
737 wake_up(&md->wait); 768 wake_up(&md->wait);
738 769
739 if (run_queue) 770 if (run_queue)
740 blk_run_queue(q); 771 blk_run_queue(md->queue);
741 772
742 /* 773 /*
743 * dm_put() must be at the end of this function. See the comment above 774 * dm_put() must be at the end of this function. See the comment above
@@ -753,6 +784,44 @@ static void free_rq_clone(struct request *clone)
753 free_rq_tio(tio); 784 free_rq_tio(tio);
754} 785}
755 786
787/*
788 * Complete the clone and the original request.
789 * Must be called without queue lock.
790 */
791static void dm_end_request(struct request *clone, int error)
792{
793 int rw = rq_data_dir(clone);
794 int run_queue = 1;
795 bool is_barrier = blk_barrier_rq(clone);
796 struct dm_rq_target_io *tio = clone->end_io_data;
797 struct mapped_device *md = tio->md;
798 struct request *rq = tio->orig;
799
800 if (blk_pc_request(rq) && !is_barrier) {
801 rq->errors = clone->errors;
802 rq->resid_len = clone->resid_len;
803
804 if (rq->sense)
805 /*
806 * We are using the sense buffer of the original
807 * request.
808 * So setting the length of the sense data is enough.
809 */
810 rq->sense_len = clone->sense_len;
811 }
812
813 free_rq_clone(clone);
814
815 if (unlikely(is_barrier)) {
816 if (unlikely(error))
817 store_barrier_error(md, error);
818 run_queue = 0;
819 } else
820 blk_end_request_all(rq, error);
821
822 rq_completed(md, rw, run_queue);
823}
824
756static void dm_unprep_request(struct request *rq) 825static void dm_unprep_request(struct request *rq)
757{ 826{
758 struct request *clone = rq->special; 827 struct request *clone = rq->special;
@@ -768,12 +837,23 @@ static void dm_unprep_request(struct request *rq)
768 */ 837 */
769void dm_requeue_unmapped_request(struct request *clone) 838void dm_requeue_unmapped_request(struct request *clone)
770{ 839{
840 int rw = rq_data_dir(clone);
771 struct dm_rq_target_io *tio = clone->end_io_data; 841 struct dm_rq_target_io *tio = clone->end_io_data;
772 struct mapped_device *md = tio->md; 842 struct mapped_device *md = tio->md;
773 struct request *rq = tio->orig; 843 struct request *rq = tio->orig;
774 struct request_queue *q = rq->q; 844 struct request_queue *q = rq->q;
775 unsigned long flags; 845 unsigned long flags;
776 846
847 if (unlikely(blk_barrier_rq(clone))) {
848 /*
849 * Barrier clones share an original request.
850 * Leave it to dm_end_request(), which handles this special
851 * case.
852 */
853 dm_end_request(clone, DM_ENDIO_REQUEUE);
854 return;
855 }
856
777 dm_unprep_request(rq); 857 dm_unprep_request(rq);
778 858
779 spin_lock_irqsave(q->queue_lock, flags); 859 spin_lock_irqsave(q->queue_lock, flags);
@@ -782,7 +862,7 @@ void dm_requeue_unmapped_request(struct request *clone)
782 blk_requeue_request(q, rq); 862 blk_requeue_request(q, rq);
783 spin_unlock_irqrestore(q->queue_lock, flags); 863 spin_unlock_irqrestore(q->queue_lock, flags);
784 864
785 rq_completed(md, 0); 865 rq_completed(md, rw, 0);
786} 866}
787EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); 867EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
788 868
@@ -815,34 +895,28 @@ static void start_queue(struct request_queue *q)
815 spin_unlock_irqrestore(q->queue_lock, flags); 895 spin_unlock_irqrestore(q->queue_lock, flags);
816} 896}
817 897
818/* 898static void dm_done(struct request *clone, int error, bool mapped)
819 * Complete the clone and the original request.
820 * Must be called without queue lock.
821 */
822static void dm_end_request(struct request *clone, int error)
823{ 899{
900 int r = error;
824 struct dm_rq_target_io *tio = clone->end_io_data; 901 struct dm_rq_target_io *tio = clone->end_io_data;
825 struct mapped_device *md = tio->md; 902 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
826 struct request *rq = tio->orig;
827 903
828 if (blk_pc_request(rq)) { 904 if (mapped && rq_end_io)
829 rq->errors = clone->errors; 905 r = rq_end_io(tio->ti, clone, error, &tio->info);
830 rq->resid_len = clone->resid_len;
831 906
832 if (rq->sense) 907 if (r <= 0)
833 /* 908 /* The target wants to complete the I/O */
834 * We are using the sense buffer of the original 909 dm_end_request(clone, r);
835 * request. 910 else if (r == DM_ENDIO_INCOMPLETE)
836 * So setting the length of the sense data is enough. 911 /* The target will handle the I/O */
837 */ 912 return;
838 rq->sense_len = clone->sense_len; 913 else if (r == DM_ENDIO_REQUEUE)
914 /* The target wants to requeue the I/O */
915 dm_requeue_unmapped_request(clone);
916 else {
917 DMWARN("unimplemented target endio return value: %d", r);
918 BUG();
839 } 919 }
840
841 free_rq_clone(clone);
842
843 blk_end_request_all(rq, error);
844
845 rq_completed(md, 1);
846} 920}
847 921
848/* 922/*
@@ -850,27 +924,14 @@ static void dm_end_request(struct request *clone, int error)
850 */ 924 */
851static void dm_softirq_done(struct request *rq) 925static void dm_softirq_done(struct request *rq)
852{ 926{
927 bool mapped = true;
853 struct request *clone = rq->completion_data; 928 struct request *clone = rq->completion_data;
854 struct dm_rq_target_io *tio = clone->end_io_data; 929 struct dm_rq_target_io *tio = clone->end_io_data;
855 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
856 int error = tio->error;
857 930
858 if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io) 931 if (rq->cmd_flags & REQ_FAILED)
859 error = rq_end_io(tio->ti, clone, error, &tio->info); 932 mapped = false;
860 933
861 if (error <= 0) 934 dm_done(clone, tio->error, mapped);
862 /* The target wants to complete the I/O */
863 dm_end_request(clone, error);
864 else if (error == DM_ENDIO_INCOMPLETE)
865 /* The target will handle the I/O */
866 return;
867 else if (error == DM_ENDIO_REQUEUE)
868 /* The target wants to requeue the I/O */
869 dm_requeue_unmapped_request(clone);
870 else {
871 DMWARN("unimplemented target endio return value: %d", error);
872 BUG();
873 }
874} 935}
875 936
876/* 937/*
@@ -882,6 +943,19 @@ static void dm_complete_request(struct request *clone, int error)
882 struct dm_rq_target_io *tio = clone->end_io_data; 943 struct dm_rq_target_io *tio = clone->end_io_data;
883 struct request *rq = tio->orig; 944 struct request *rq = tio->orig;
884 945
946 if (unlikely(blk_barrier_rq(clone))) {
947 /*
948 * Barrier clones share an original request. So can't use
949 * softirq_done with the original.
950 * Pass the clone to dm_done() directly in this special case.
951 * It is safe (even if clone->q->queue_lock is held here)
952 * because there is no I/O dispatching during the completion
953 * of barrier clone.
954 */
955 dm_done(clone, error, true);
956 return;
957 }
958
885 tio->error = error; 959 tio->error = error;
886 rq->completion_data = clone; 960 rq->completion_data = clone;
887 blk_complete_request(rq); 961 blk_complete_request(rq);
@@ -898,6 +972,17 @@ void dm_kill_unmapped_request(struct request *clone, int error)
898 struct dm_rq_target_io *tio = clone->end_io_data; 972 struct dm_rq_target_io *tio = clone->end_io_data;
899 struct request *rq = tio->orig; 973 struct request *rq = tio->orig;
900 974
975 if (unlikely(blk_barrier_rq(clone))) {
976 /*
977 * Barrier clones share an original request.
978 * Leave it to dm_end_request(), which handles this special
979 * case.
980 */
981 BUG_ON(error > 0);
982 dm_end_request(clone, error);
983 return;
984 }
985
901 rq->cmd_flags |= REQ_FAILED; 986 rq->cmd_flags |= REQ_FAILED;
902 dm_complete_request(clone, error); 987 dm_complete_request(clone, error);
903} 988}
@@ -1214,7 +1299,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1214 struct clone_info ci; 1299 struct clone_info ci;
1215 int error = 0; 1300 int error = 0;
1216 1301
1217 ci.map = dm_get_table(md); 1302 ci.map = dm_get_live_table(md);
1218 if (unlikely(!ci.map)) { 1303 if (unlikely(!ci.map)) {
1219 if (!bio_rw_flagged(bio, BIO_RW_BARRIER)) 1304 if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
1220 bio_io_error(bio); 1305 bio_io_error(bio);
@@ -1255,7 +1340,7 @@ static int dm_merge_bvec(struct request_queue *q,
1255 struct bio_vec *biovec) 1340 struct bio_vec *biovec)
1256{ 1341{
1257 struct mapped_device *md = q->queuedata; 1342 struct mapped_device *md = q->queuedata;
1258 struct dm_table *map = dm_get_table(md); 1343 struct dm_table *map = dm_get_live_table(md);
1259 struct dm_target *ti; 1344 struct dm_target *ti;
1260 sector_t max_sectors; 1345 sector_t max_sectors;
1261 int max_size = 0; 1346 int max_size = 0;
@@ -1352,11 +1437,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
1352{ 1437{
1353 struct mapped_device *md = q->queuedata; 1438 struct mapped_device *md = q->queuedata;
1354 1439
1355 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1356 bio_endio(bio, -EOPNOTSUPP);
1357 return 0;
1358 }
1359
1360 return md->saved_make_request_fn(q, bio); /* call __make_request() */ 1440 return md->saved_make_request_fn(q, bio); /* call __make_request() */
1361} 1441}
1362 1442
@@ -1375,6 +1455,25 @@ static int dm_request(struct request_queue *q, struct bio *bio)
1375 return _dm_request(q, bio); 1455 return _dm_request(q, bio);
1376} 1456}
1377 1457
1458/*
1459 * Mark this request as flush request, so that dm_request_fn() can
1460 * recognize.
1461 */
1462static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
1463{
1464 rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
1465 rq->cmd[0] = REQ_LB_OP_FLUSH;
1466}
1467
1468static bool dm_rq_is_flush_request(struct request *rq)
1469{
1470 if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1471 rq->cmd[0] == REQ_LB_OP_FLUSH)
1472 return true;
1473 else
1474 return false;
1475}
1476
1378void dm_dispatch_request(struct request *rq) 1477void dm_dispatch_request(struct request *rq)
1379{ 1478{
1380 int r; 1479 int r;
@@ -1420,25 +1519,54 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1420static int setup_clone(struct request *clone, struct request *rq, 1519static int setup_clone(struct request *clone, struct request *rq,
1421 struct dm_rq_target_io *tio) 1520 struct dm_rq_target_io *tio)
1422{ 1521{
1423 int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, 1522 int r;
1424 dm_rq_bio_constructor, tio);
1425 1523
1426 if (r) 1524 if (dm_rq_is_flush_request(rq)) {
1427 return r; 1525 blk_rq_init(NULL, clone);
1526 clone->cmd_type = REQ_TYPE_FS;
1527 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
1528 } else {
1529 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1530 dm_rq_bio_constructor, tio);
1531 if (r)
1532 return r;
1533
1534 clone->cmd = rq->cmd;
1535 clone->cmd_len = rq->cmd_len;
1536 clone->sense = rq->sense;
1537 clone->buffer = rq->buffer;
1538 }
1428 1539
1429 clone->cmd = rq->cmd;
1430 clone->cmd_len = rq->cmd_len;
1431 clone->sense = rq->sense;
1432 clone->buffer = rq->buffer;
1433 clone->end_io = end_clone_request; 1540 clone->end_io = end_clone_request;
1434 clone->end_io_data = tio; 1541 clone->end_io_data = tio;
1435 1542
1436 return 0; 1543 return 0;
1437} 1544}
1438 1545
1439static int dm_rq_flush_suspending(struct mapped_device *md) 1546static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1547 gfp_t gfp_mask)
1440{ 1548{
1441 return !md->suspend_rq.special; 1549 struct request *clone;
1550 struct dm_rq_target_io *tio;
1551
1552 tio = alloc_rq_tio(md, gfp_mask);
1553 if (!tio)
1554 return NULL;
1555
1556 tio->md = md;
1557 tio->ti = NULL;
1558 tio->orig = rq;
1559 tio->error = 0;
1560 memset(&tio->info, 0, sizeof(tio->info));
1561
1562 clone = &tio->clone;
1563 if (setup_clone(clone, rq, tio)) {
1564 /* -ENOMEM */
1565 free_rq_tio(tio);
1566 return NULL;
1567 }
1568
1569 return clone;
1442} 1570}
1443 1571
1444/* 1572/*
@@ -1447,51 +1575,35 @@ static int dm_rq_flush_suspending(struct mapped_device *md)
1447static int dm_prep_fn(struct request_queue *q, struct request *rq) 1575static int dm_prep_fn(struct request_queue *q, struct request *rq)
1448{ 1576{
1449 struct mapped_device *md = q->queuedata; 1577 struct mapped_device *md = q->queuedata;
1450 struct dm_rq_target_io *tio;
1451 struct request *clone; 1578 struct request *clone;
1452 1579
1453 if (unlikely(rq == &md->suspend_rq)) { 1580 if (unlikely(dm_rq_is_flush_request(rq)))
1454 if (dm_rq_flush_suspending(md)) 1581 return BLKPREP_OK;
1455 return BLKPREP_OK;
1456 else
1457 /* The flush suspend was interrupted */
1458 return BLKPREP_KILL;
1459 }
1460 1582
1461 if (unlikely(rq->special)) { 1583 if (unlikely(rq->special)) {
1462 DMWARN("Already has something in rq->special."); 1584 DMWARN("Already has something in rq->special.");
1463 return BLKPREP_KILL; 1585 return BLKPREP_KILL;
1464 } 1586 }
1465 1587
1466 tio = alloc_rq_tio(md); /* Only one for each original request */ 1588 clone = clone_rq(rq, md, GFP_ATOMIC);
1467 if (!tio) 1589 if (!clone)
1468 /* -ENOMEM */
1469 return BLKPREP_DEFER; 1590 return BLKPREP_DEFER;
1470 1591
1471 tio->md = md;
1472 tio->ti = NULL;
1473 tio->orig = rq;
1474 tio->error = 0;
1475 memset(&tio->info, 0, sizeof(tio->info));
1476
1477 clone = &tio->clone;
1478 if (setup_clone(clone, rq, tio)) {
1479 /* -ENOMEM */
1480 free_rq_tio(tio);
1481 return BLKPREP_DEFER;
1482 }
1483
1484 rq->special = clone; 1592 rq->special = clone;
1485 rq->cmd_flags |= REQ_DONTPREP; 1593 rq->cmd_flags |= REQ_DONTPREP;
1486 1594
1487 return BLKPREP_OK; 1595 return BLKPREP_OK;
1488} 1596}
1489 1597
1490static void map_request(struct dm_target *ti, struct request *rq, 1598/*
1491 struct mapped_device *md) 1599 * Returns:
1600 * 0 : the request has been processed (not requeued)
1601 * !0 : the request has been requeued
1602 */
1603static int map_request(struct dm_target *ti, struct request *clone,
1604 struct mapped_device *md)
1492{ 1605{
1493 int r; 1606 int r, requeued = 0;
1494 struct request *clone = rq->special;
1495 struct dm_rq_target_io *tio = clone->end_io_data; 1607 struct dm_rq_target_io *tio = clone->end_io_data;
1496 1608
1497 /* 1609 /*
@@ -1511,11 +1623,14 @@ static void map_request(struct dm_target *ti, struct request *rq,
1511 break; 1623 break;
1512 case DM_MAPIO_REMAPPED: 1624 case DM_MAPIO_REMAPPED:
1513 /* The target has remapped the I/O so dispatch it */ 1625 /* The target has remapped the I/O so dispatch it */
1626 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1627 blk_rq_pos(tio->orig));
1514 dm_dispatch_request(clone); 1628 dm_dispatch_request(clone);
1515 break; 1629 break;
1516 case DM_MAPIO_REQUEUE: 1630 case DM_MAPIO_REQUEUE:
1517 /* The target wants to requeue the I/O */ 1631 /* The target wants to requeue the I/O */
1518 dm_requeue_unmapped_request(clone); 1632 dm_requeue_unmapped_request(clone);
1633 requeued = 1;
1519 break; 1634 break;
1520 default: 1635 default:
1521 if (r > 0) { 1636 if (r > 0) {
@@ -1527,6 +1642,8 @@ static void map_request(struct dm_target *ti, struct request *rq,
1527 dm_kill_unmapped_request(clone, r); 1642 dm_kill_unmapped_request(clone, r);
1528 break; 1643 break;
1529 } 1644 }
1645
1646 return requeued;
1530} 1647}
1531 1648
1532/* 1649/*
@@ -1536,29 +1653,26 @@ static void map_request(struct dm_target *ti, struct request *rq,
1536static void dm_request_fn(struct request_queue *q) 1653static void dm_request_fn(struct request_queue *q)
1537{ 1654{
1538 struct mapped_device *md = q->queuedata; 1655 struct mapped_device *md = q->queuedata;
1539 struct dm_table *map = dm_get_table(md); 1656 struct dm_table *map = dm_get_live_table(md);
1540 struct dm_target *ti; 1657 struct dm_target *ti;
1541 struct request *rq; 1658 struct request *rq, *clone;
1542 1659
1543 /* 1660 /*
1544 * For noflush suspend, check blk_queue_stopped() to immediately 1661 * For suspend, check blk_queue_stopped() and increment
1545 * quit I/O dispatching. 1662 * ->pending within a single queue_lock not to increment the
1663 * number of in-flight I/Os after the queue is stopped in
1664 * dm_suspend().
1546 */ 1665 */
1547 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { 1666 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1548 rq = blk_peek_request(q); 1667 rq = blk_peek_request(q);
1549 if (!rq) 1668 if (!rq)
1550 goto plug_and_out; 1669 goto plug_and_out;
1551 1670
1552 if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ 1671 if (unlikely(dm_rq_is_flush_request(rq))) {
1553 if (queue_in_flight(q)) 1672 BUG_ON(md->flush_request);
1554 /* Not quiet yet. Wait more */ 1673 md->flush_request = rq;
1555 goto plug_and_out;
1556
1557 /* This device should be quiet now */
1558 __stop_queue(q);
1559 blk_start_request(rq); 1674 blk_start_request(rq);
1560 __blk_end_request_all(rq, 0); 1675 queue_work(md->wq, &md->barrier_work);
1561 wake_up(&md->wait);
1562 goto out; 1676 goto out;
1563 } 1677 }
1564 1678
@@ -1567,13 +1681,21 @@ static void dm_request_fn(struct request_queue *q)
1567 goto plug_and_out; 1681 goto plug_and_out;
1568 1682
1569 blk_start_request(rq); 1683 blk_start_request(rq);
1684 clone = rq->special;
1685 atomic_inc(&md->pending[rq_data_dir(clone)]);
1686
1570 spin_unlock(q->queue_lock); 1687 spin_unlock(q->queue_lock);
1571 map_request(ti, rq, md); 1688 if (map_request(ti, clone, md))
1689 goto requeued;
1690
1572 spin_lock_irq(q->queue_lock); 1691 spin_lock_irq(q->queue_lock);
1573 } 1692 }
1574 1693
1575 goto out; 1694 goto out;
1576 1695
1696requeued:
1697 spin_lock_irq(q->queue_lock);
1698
1577plug_and_out: 1699plug_and_out:
1578 if (!elv_queue_empty(q)) 1700 if (!elv_queue_empty(q))
1579 /* Some requests still remain, retry later */ 1701 /* Some requests still remain, retry later */
@@ -1595,7 +1717,7 @@ static int dm_lld_busy(struct request_queue *q)
1595{ 1717{
1596 int r; 1718 int r;
1597 struct mapped_device *md = q->queuedata; 1719 struct mapped_device *md = q->queuedata;
1598 struct dm_table *map = dm_get_table(md); 1720 struct dm_table *map = dm_get_live_table(md);
1599 1721
1600 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) 1722 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
1601 r = 1; 1723 r = 1;
@@ -1610,7 +1732,7 @@ static int dm_lld_busy(struct request_queue *q)
1610static void dm_unplug_all(struct request_queue *q) 1732static void dm_unplug_all(struct request_queue *q)
1611{ 1733{
1612 struct mapped_device *md = q->queuedata; 1734 struct mapped_device *md = q->queuedata;
1613 struct dm_table *map = dm_get_table(md); 1735 struct dm_table *map = dm_get_live_table(md);
1614 1736
1615 if (map) { 1737 if (map) {
1616 if (dm_request_based(md)) 1738 if (dm_request_based(md))
@@ -1628,7 +1750,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
1628 struct dm_table *map; 1750 struct dm_table *map;
1629 1751
1630 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1752 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1631 map = dm_get_table(md); 1753 map = dm_get_live_table(md);
1632 if (map) { 1754 if (map) {
1633 /* 1755 /*
1634 * Request-based dm cares about only own queue for 1756 * Request-based dm cares about only own queue for
@@ -1725,6 +1847,7 @@ out:
1725static const struct block_device_operations dm_blk_dops; 1847static const struct block_device_operations dm_blk_dops;
1726 1848
1727static void dm_wq_work(struct work_struct *work); 1849static void dm_wq_work(struct work_struct *work);
1850static void dm_rq_barrier_work(struct work_struct *work);
1728 1851
1729/* 1852/*
1730 * Allocate and initialise a blank device with a given minor. 1853 * Allocate and initialise a blank device with a given minor.
@@ -1754,6 +1877,7 @@ static struct mapped_device *alloc_dev(int minor)
1754 init_rwsem(&md->io_lock); 1877 init_rwsem(&md->io_lock);
1755 mutex_init(&md->suspend_lock); 1878 mutex_init(&md->suspend_lock);
1756 spin_lock_init(&md->deferred_lock); 1879 spin_lock_init(&md->deferred_lock);
1880 spin_lock_init(&md->barrier_error_lock);
1757 rwlock_init(&md->map_lock); 1881 rwlock_init(&md->map_lock);
1758 atomic_set(&md->holders, 1); 1882 atomic_set(&md->holders, 1);
1759 atomic_set(&md->open_count, 0); 1883 atomic_set(&md->open_count, 0);
@@ -1788,6 +1912,8 @@ static struct mapped_device *alloc_dev(int minor)
1788 blk_queue_softirq_done(md->queue, dm_softirq_done); 1912 blk_queue_softirq_done(md->queue, dm_softirq_done);
1789 blk_queue_prep_rq(md->queue, dm_prep_fn); 1913 blk_queue_prep_rq(md->queue, dm_prep_fn);
1790 blk_queue_lld_busy(md->queue, dm_lld_busy); 1914 blk_queue_lld_busy(md->queue, dm_lld_busy);
1915 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
1916 dm_rq_prepare_flush);
1791 1917
1792 md->disk = alloc_disk(1); 1918 md->disk = alloc_disk(1);
1793 if (!md->disk) 1919 if (!md->disk)
@@ -1797,6 +1923,7 @@ static struct mapped_device *alloc_dev(int minor)
1797 atomic_set(&md->pending[1], 0); 1923 atomic_set(&md->pending[1], 0);
1798 init_waitqueue_head(&md->wait); 1924 init_waitqueue_head(&md->wait);
1799 INIT_WORK(&md->work, dm_wq_work); 1925 INIT_WORK(&md->work, dm_wq_work);
1926 INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1800 init_waitqueue_head(&md->eventq); 1927 init_waitqueue_head(&md->eventq);
1801 1928
1802 md->disk->major = _major; 1929 md->disk->major = _major;
@@ -1921,9 +2048,13 @@ static void __set_size(struct mapped_device *md, sector_t size)
1921 mutex_unlock(&md->bdev->bd_inode->i_mutex); 2048 mutex_unlock(&md->bdev->bd_inode->i_mutex);
1922} 2049}
1923 2050
1924static int __bind(struct mapped_device *md, struct dm_table *t, 2051/*
1925 struct queue_limits *limits) 2052 * Returns old map, which caller must destroy.
2053 */
2054static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2055 struct queue_limits *limits)
1926{ 2056{
2057 struct dm_table *old_map;
1927 struct request_queue *q = md->queue; 2058 struct request_queue *q = md->queue;
1928 sector_t size; 2059 sector_t size;
1929 unsigned long flags; 2060 unsigned long flags;
@@ -1938,11 +2069,6 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
1938 2069
1939 __set_size(md, size); 2070 __set_size(md, size);
1940 2071
1941 if (!size) {
1942 dm_table_destroy(t);
1943 return 0;
1944 }
1945
1946 dm_table_event_callback(t, event_callback, md); 2072 dm_table_event_callback(t, event_callback, md);
1947 2073
1948 /* 2074 /*
@@ -1958,26 +2084,31 @@ static int __bind(struct mapped_device *md, struct dm_table *t,
1958 __bind_mempools(md, t); 2084 __bind_mempools(md, t);
1959 2085
1960 write_lock_irqsave(&md->map_lock, flags); 2086 write_lock_irqsave(&md->map_lock, flags);
2087 old_map = md->map;
1961 md->map = t; 2088 md->map = t;
1962 dm_table_set_restrictions(t, q, limits); 2089 dm_table_set_restrictions(t, q, limits);
1963 write_unlock_irqrestore(&md->map_lock, flags); 2090 write_unlock_irqrestore(&md->map_lock, flags);
1964 2091
1965 return 0; 2092 return old_map;
1966} 2093}
1967 2094
1968static void __unbind(struct mapped_device *md) 2095/*
2096 * Returns unbound table for the caller to free.
2097 */
2098static struct dm_table *__unbind(struct mapped_device *md)
1969{ 2099{
1970 struct dm_table *map = md->map; 2100 struct dm_table *map = md->map;
1971 unsigned long flags; 2101 unsigned long flags;
1972 2102
1973 if (!map) 2103 if (!map)
1974 return; 2104 return NULL;
1975 2105
1976 dm_table_event_callback(map, NULL, NULL); 2106 dm_table_event_callback(map, NULL, NULL);
1977 write_lock_irqsave(&md->map_lock, flags); 2107 write_lock_irqsave(&md->map_lock, flags);
1978 md->map = NULL; 2108 md->map = NULL;
1979 write_unlock_irqrestore(&md->map_lock, flags); 2109 write_unlock_irqrestore(&md->map_lock, flags);
1980 dm_table_destroy(map); 2110
2111 return map;
1981} 2112}
1982 2113
1983/* 2114/*
@@ -2059,18 +2190,18 @@ void dm_put(struct mapped_device *md)
2059 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 2190 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2060 2191
2061 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 2192 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
2062 map = dm_get_table(md); 2193 map = dm_get_live_table(md);
2063 idr_replace(&_minor_idr, MINOR_ALLOCED, 2194 idr_replace(&_minor_idr, MINOR_ALLOCED,
2064 MINOR(disk_devt(dm_disk(md)))); 2195 MINOR(disk_devt(dm_disk(md))));
2065 set_bit(DMF_FREEING, &md->flags); 2196 set_bit(DMF_FREEING, &md->flags);
2066 spin_unlock(&_minor_lock); 2197 spin_unlock(&_minor_lock);
2067 if (!dm_suspended(md)) { 2198 if (!dm_suspended_md(md)) {
2068 dm_table_presuspend_targets(map); 2199 dm_table_presuspend_targets(map);
2069 dm_table_postsuspend_targets(map); 2200 dm_table_postsuspend_targets(map);
2070 } 2201 }
2071 dm_sysfs_exit(md); 2202 dm_sysfs_exit(md);
2072 dm_table_put(map); 2203 dm_table_put(map);
2073 __unbind(md); 2204 dm_table_destroy(__unbind(md));
2074 free_dev(md); 2205 free_dev(md);
2075 } 2206 }
2076} 2207}
@@ -2080,8 +2211,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2080{ 2211{
2081 int r = 0; 2212 int r = 0;
2082 DECLARE_WAITQUEUE(wait, current); 2213 DECLARE_WAITQUEUE(wait, current);
2083 struct request_queue *q = md->queue;
2084 unsigned long flags;
2085 2214
2086 dm_unplug_all(md->queue); 2215 dm_unplug_all(md->queue);
2087 2216
@@ -2091,15 +2220,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2091 set_current_state(interruptible); 2220 set_current_state(interruptible);
2092 2221
2093 smp_mb(); 2222 smp_mb();
2094 if (dm_request_based(md)) { 2223 if (!md_in_flight(md))
2095 spin_lock_irqsave(q->queue_lock, flags);
2096 if (!queue_in_flight(q) && blk_queue_stopped(q)) {
2097 spin_unlock_irqrestore(q->queue_lock, flags);
2098 break;
2099 }
2100 spin_unlock_irqrestore(q->queue_lock, flags);
2101 } else if (!atomic_read(&md->pending[0]) &&
2102 !atomic_read(&md->pending[1]))
2103 break; 2224 break;
2104 2225
2105 if (interruptible == TASK_INTERRUPTIBLE && 2226 if (interruptible == TASK_INTERRUPTIBLE &&
@@ -2194,98 +2315,106 @@ static void dm_queue_flush(struct mapped_device *md)
2194 queue_work(md->wq, &md->work); 2315 queue_work(md->wq, &md->work);
2195} 2316}
2196 2317
2197/* 2318static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
2198 * Swap in a new table (destroying old one).
2199 */
2200int dm_swap_table(struct mapped_device *md, struct dm_table *table)
2201{ 2319{
2202 struct queue_limits limits; 2320 struct dm_rq_target_io *tio = clone->end_io_data;
2203 int r = -EINVAL;
2204 2321
2205 mutex_lock(&md->suspend_lock); 2322 tio->info.flush_request = flush_nr;
2323}
2206 2324
2207 /* device must be suspended */ 2325/* Issue barrier requests to targets and wait for their completion. */
2208 if (!dm_suspended(md)) 2326static int dm_rq_barrier(struct mapped_device *md)
2209 goto out; 2327{
2328 int i, j;
2329 struct dm_table *map = dm_get_live_table(md);
2330 unsigned num_targets = dm_table_get_num_targets(map);
2331 struct dm_target *ti;
2332 struct request *clone;
2210 2333
2211 r = dm_calculate_queue_limits(table, &limits); 2334 md->barrier_error = 0;
2212 if (r)
2213 goto out;
2214 2335
2215 /* cannot change the device type, once a table is bound */ 2336 for (i = 0; i < num_targets; i++) {
2216 if (md->map && 2337 ti = dm_table_get_target(map, i);
2217 (dm_table_get_type(md->map) != dm_table_get_type(table))) { 2338 for (j = 0; j < ti->num_flush_requests; j++) {
2218 DMWARN("can't change the device type after a table is bound"); 2339 clone = clone_rq(md->flush_request, md, GFP_NOIO);
2219 goto out; 2340 dm_rq_set_flush_nr(clone, j);
2341 atomic_inc(&md->pending[rq_data_dir(clone)]);
2342 map_request(ti, clone, md);
2343 }
2220 } 2344 }
2221 2345
2222 __unbind(md); 2346 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2223 r = __bind(md, table, &limits); 2347 dm_table_put(map);
2224
2225out:
2226 mutex_unlock(&md->suspend_lock);
2227 return r;
2228}
2229 2348
2230static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) 2349 return md->barrier_error;
2231{
2232 md->suspend_rq.special = (void *)0x1;
2233} 2350}
2234 2351
2235static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) 2352static void dm_rq_barrier_work(struct work_struct *work)
2236{ 2353{
2354 int error;
2355 struct mapped_device *md = container_of(work, struct mapped_device,
2356 barrier_work);
2237 struct request_queue *q = md->queue; 2357 struct request_queue *q = md->queue;
2358 struct request *rq;
2238 unsigned long flags; 2359 unsigned long flags;
2239 2360
2240 spin_lock_irqsave(q->queue_lock, flags); 2361 /*
2241 if (!noflush) 2362 * Hold the md reference here and leave it at the last part so that
2242 dm_rq_invalidate_suspend_marker(md); 2363 * the md can't be deleted by device opener when the barrier request
2243 __start_queue(q); 2364 * completes.
2244 spin_unlock_irqrestore(q->queue_lock, flags); 2365 */
2245} 2366 dm_get(md);
2246 2367
2247static void dm_rq_start_suspend(struct mapped_device *md, int noflush) 2368 error = dm_rq_barrier(md);
2248{
2249 struct request *rq = &md->suspend_rq;
2250 struct request_queue *q = md->queue;
2251 2369
2252 if (noflush) 2370 rq = md->flush_request;
2253 stop_queue(q); 2371 md->flush_request = NULL;
2254 else { 2372
2255 blk_rq_init(q, rq); 2373 if (error == DM_ENDIO_REQUEUE) {
2256 blk_insert_request(q, rq, 0, NULL); 2374 spin_lock_irqsave(q->queue_lock, flags);
2257 } 2375 blk_requeue_request(q, rq);
2376 spin_unlock_irqrestore(q->queue_lock, flags);
2377 } else
2378 blk_end_request_all(rq, error);
2379
2380 blk_run_queue(q);
2381
2382 dm_put(md);
2258} 2383}
2259 2384
2260static int dm_rq_suspend_available(struct mapped_device *md, int noflush) 2385/*
2386 * Swap in a new table, returning the old one for the caller to destroy.
2387 */
2388struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2261{ 2389{
2262 int r = 1; 2390 struct dm_table *map = ERR_PTR(-EINVAL);
2263 struct request *rq = &md->suspend_rq; 2391 struct queue_limits limits;
2264 struct request_queue *q = md->queue; 2392 int r;
2265 unsigned long flags;
2266 2393
2267 if (noflush) 2394 mutex_lock(&md->suspend_lock);
2268 return r;
2269 2395
2270 /* The marker must be protected by queue lock if it is in use */ 2396 /* device must be suspended */
2271 spin_lock_irqsave(q->queue_lock, flags); 2397 if (!dm_suspended_md(md))
2272 if (unlikely(rq->ref_count)) { 2398 goto out;
2273 /* 2399
2274 * This can happen, when the previous flush suspend was 2400 r = dm_calculate_queue_limits(table, &limits);
2275 * interrupted, the marker is still in the queue and 2401 if (r) {
2276 * this flush suspend has been invoked, because we don't 2402 map = ERR_PTR(r);
2277 * remove the marker at the time of suspend interruption. 2403 goto out;
2278 * We have only one marker per mapped_device, so we can't
2279 * start another flush suspend while it is in use.
2280 */
2281 BUG_ON(!rq->special); /* The marker should be invalidated */
2282 DMWARN("Invalidating the previous flush suspend is still in"
2283 " progress. Please retry later.");
2284 r = 0;
2285 } 2404 }
2286 spin_unlock_irqrestore(q->queue_lock, flags);
2287 2405
2288 return r; 2406 /* cannot change the device type, once a table is bound */
2407 if (md->map &&
2408 (dm_table_get_type(md->map) != dm_table_get_type(table))) {
2409 DMWARN("can't change the device type after a table is bound");
2410 goto out;
2411 }
2412
2413 map = __bind(md, table, &limits);
2414
2415out:
2416 mutex_unlock(&md->suspend_lock);
2417 return map;
2289} 2418}
2290 2419
2291/* 2420/*
@@ -2330,49 +2459,11 @@ static void unlock_fs(struct mapped_device *md)
2330/* 2459/*
2331 * Suspend mechanism in request-based dm. 2460 * Suspend mechanism in request-based dm.
2332 * 2461 *
2333 * After the suspend starts, further incoming requests are kept in 2462 * 1. Flush all I/Os by lock_fs() if needed.
2334 * the request_queue and deferred. 2463 * 2. Stop dispatching any I/O by stopping the request_queue.
2335 * Remaining requests in the request_queue at the start of suspend are flushed 2464 * 3. Wait for all in-flight I/Os to be completed or requeued.
2336 * if it is flush suspend.
2337 * The suspend completes when the following conditions have been satisfied,
2338 * so wait for it:
2339 * 1. q->in_flight is 0 (which means no in_flight request)
2340 * 2. queue has been stopped (which means no request dispatching)
2341 * 2465 *
2342 * 2466 * To abort suspend, start the request_queue.
2343 * Noflush suspend
2344 * ---------------
2345 * Noflush suspend doesn't need to dispatch remaining requests.
2346 * So stop the queue immediately. Then, wait for all in_flight requests
2347 * to be completed or requeued.
2348 *
2349 * To abort noflush suspend, start the queue.
2350 *
2351 *
2352 * Flush suspend
2353 * -------------
2354 * Flush suspend needs to dispatch remaining requests. So stop the queue
2355 * after the remaining requests are completed. (Requeued request must be also
2356 * re-dispatched and completed. Until then, we can't stop the queue.)
2357 *
2358 * During flushing the remaining requests, further incoming requests are also
2359 * inserted to the same queue. To distinguish which requests are to be
2360 * flushed, we insert a marker request to the queue at the time of starting
2361 * flush suspend, like a barrier.
2362 * The dispatching is blocked when the marker is found on the top of the queue.
2363 * And the queue is stopped when all in_flight requests are completed, since
2364 * that means the remaining requests are completely flushed.
2365 * Then, the marker is removed from the queue.
2366 *
2367 * To abort flush suspend, we also need to take care of the marker, not only
2368 * starting the queue.
2369 * We don't remove the marker forcibly from the queue since it's against
2370 * the block-layer manner. Instead, we put a invalidated mark on the marker.
2371 * When the invalidated marker is found on the top of the queue, it is
2372 * immediately removed from the queue, so it doesn't block dispatching.
2373 * Because we have only one marker per mapped_device, we can't start another
2374 * flush suspend until the invalidated marker is removed from the queue.
2375 * So fail and return with -EBUSY in such a case.
2376 */ 2467 */
2377int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2468int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2378{ 2469{
@@ -2383,17 +2474,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2383 2474
2384 mutex_lock(&md->suspend_lock); 2475 mutex_lock(&md->suspend_lock);
2385 2476
2386 if (dm_suspended(md)) { 2477 if (dm_suspended_md(md)) {
2387 r = -EINVAL; 2478 r = -EINVAL;
2388 goto out_unlock; 2479 goto out_unlock;
2389 } 2480 }
2390 2481
2391 if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { 2482 map = dm_get_live_table(md);
2392 r = -EBUSY;
2393 goto out_unlock;
2394 }
2395
2396 map = dm_get_table(md);
2397 2483
2398 /* 2484 /*
2399 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 2485 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2406,8 +2492,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2406 dm_table_presuspend_targets(map); 2492 dm_table_presuspend_targets(map);
2407 2493
2408 /* 2494 /*
2409 * Flush I/O to the device. noflush supersedes do_lockfs, 2495 * Flush I/O to the device.
2410 * because lock_fs() needs to flush I/Os. 2496 * Any I/O submitted after lock_fs() may not be flushed.
2497 * noflush takes precedence over do_lockfs.
2498 * (lock_fs() flushes I/Os and waits for them to complete.)
2411 */ 2499 */
2412 if (!noflush && do_lockfs) { 2500 if (!noflush && do_lockfs) {
2413 r = lock_fs(md); 2501 r = lock_fs(md);
@@ -2436,10 +2524,15 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2436 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 2524 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2437 up_write(&md->io_lock); 2525 up_write(&md->io_lock);
2438 2526
2439 flush_workqueue(md->wq); 2527 /*
2440 2528 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
2529 * can be kicked until md->queue is stopped. So stop md->queue before
2530 * flushing md->wq.
2531 */
2441 if (dm_request_based(md)) 2532 if (dm_request_based(md))
2442 dm_rq_start_suspend(md, noflush); 2533 stop_queue(md->queue);
2534
2535 flush_workqueue(md->wq);
2443 2536
2444 /* 2537 /*
2445 * At this point no more requests are entering target request routines. 2538 * At this point no more requests are entering target request routines.
@@ -2458,7 +2551,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2458 dm_queue_flush(md); 2551 dm_queue_flush(md);
2459 2552
2460 if (dm_request_based(md)) 2553 if (dm_request_based(md))
2461 dm_rq_abort_suspend(md, noflush); 2554 start_queue(md->queue);
2462 2555
2463 unlock_fs(md); 2556 unlock_fs(md);
2464 goto out; /* pushback list is already flushed, so skip flush */ 2557 goto out; /* pushback list is already flushed, so skip flush */
@@ -2470,10 +2563,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2470 * requests are being added to md->deferred list. 2563 * requests are being added to md->deferred list.
2471 */ 2564 */
2472 2565
2473 dm_table_postsuspend_targets(map);
2474
2475 set_bit(DMF_SUSPENDED, &md->flags); 2566 set_bit(DMF_SUSPENDED, &md->flags);
2476 2567
2568 dm_table_postsuspend_targets(map);
2569
2477out: 2570out:
2478 dm_table_put(map); 2571 dm_table_put(map);
2479 2572
@@ -2488,10 +2581,10 @@ int dm_resume(struct mapped_device *md)
2488 struct dm_table *map = NULL; 2581 struct dm_table *map = NULL;
2489 2582
2490 mutex_lock(&md->suspend_lock); 2583 mutex_lock(&md->suspend_lock);
2491 if (!dm_suspended(md)) 2584 if (!dm_suspended_md(md))
2492 goto out; 2585 goto out;
2493 2586
2494 map = dm_get_table(md); 2587 map = dm_get_live_table(md);
2495 if (!map || !dm_table_get_size(map)) 2588 if (!map || !dm_table_get_size(map))
2496 goto out; 2589 goto out;
2497 2590
@@ -2525,18 +2618,19 @@ out:
2525/*----------------------------------------------------------------- 2618/*-----------------------------------------------------------------
2526 * Event notification. 2619 * Event notification.
2527 *---------------------------------------------------------------*/ 2620 *---------------------------------------------------------------*/
2528void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 2621int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
2529 unsigned cookie) 2622 unsigned cookie)
2530{ 2623{
2531 char udev_cookie[DM_COOKIE_LENGTH]; 2624 char udev_cookie[DM_COOKIE_LENGTH];
2532 char *envp[] = { udev_cookie, NULL }; 2625 char *envp[] = { udev_cookie, NULL };
2533 2626
2534 if (!cookie) 2627 if (!cookie)
2535 kobject_uevent(&disk_to_dev(md->disk)->kobj, action); 2628 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
2536 else { 2629 else {
2537 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 2630 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2538 DM_COOKIE_ENV_VAR_NAME, cookie); 2631 DM_COOKIE_ENV_VAR_NAME, cookie);
2539 kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); 2632 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
2633 action, envp);
2540 } 2634 }
2541} 2635}
2542 2636
@@ -2592,26 +2686,27 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2592 return NULL; 2686 return NULL;
2593 2687
2594 if (test_bit(DMF_FREEING, &md->flags) || 2688 if (test_bit(DMF_FREEING, &md->flags) ||
2595 test_bit(DMF_DELETING, &md->flags)) 2689 dm_deleting_md(md))
2596 return NULL; 2690 return NULL;
2597 2691
2598 dm_get(md); 2692 dm_get(md);
2599 return md; 2693 return md;
2600} 2694}
2601 2695
2602int dm_suspended(struct mapped_device *md) 2696int dm_suspended_md(struct mapped_device *md)
2603{ 2697{
2604 return test_bit(DMF_SUSPENDED, &md->flags); 2698 return test_bit(DMF_SUSPENDED, &md->flags);
2605} 2699}
2606 2700
2607int dm_noflush_suspending(struct dm_target *ti) 2701int dm_suspended(struct dm_target *ti)
2608{ 2702{
2609 struct mapped_device *md = dm_table_get_md(ti->table); 2703 return dm_suspended_md(dm_table_get_md(ti->table));
2610 int r = __noflush_suspending(md); 2704}
2611 2705EXPORT_SYMBOL_GPL(dm_suspended);
2612 dm_put(md);
2613 2706
2614 return r; 2707int dm_noflush_suspending(struct dm_target *ti)
2708{
2709 return __noflush_suspending(dm_table_get_md(ti->table));
2615} 2710}
2616EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2711EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2617 2712