aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm-table.c14
-rw-r--r--drivers/md/dm.c705
-rw-r--r--drivers/md/dm.h1
-rw-r--r--include/linux/device-mapper.h9
4 files changed, 725 insertions, 4 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 09a57113955..c5f784419f2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1080,6 +1080,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
1080 return r; 1080 return r;
1081} 1081}
1082 1082
1083int dm_table_any_busy_target(struct dm_table *t)
1084{
1085 unsigned i;
1086 struct dm_target *ti;
1087
1088 for (i = 0; i < t->num_targets; i++) {
1089 ti = t->targets + i;
1090 if (ti->type->busy && ti->type->busy(ti))
1091 return 1;
1092 }
1093
1094 return 0;
1095}
1096
1083void dm_table_unplug_all(struct dm_table *t) 1097void dm_table_unplug_all(struct dm_table *t)
1084{ 1098{
1085 struct dm_dev_internal *dd; 1099 struct dm_dev_internal *dd;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f609793a92d..be003e5fea3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -78,7 +78,7 @@ struct dm_rq_target_io {
78 */ 78 */
79struct dm_rq_clone_bio_info { 79struct dm_rq_clone_bio_info {
80 struct bio *orig; 80 struct bio *orig;
81 struct request *rq; 81 struct dm_rq_target_io *tio;
82}; 82};
83 83
84union map_info *dm_get_mapinfo(struct bio *bio) 84union map_info *dm_get_mapinfo(struct bio *bio)
@@ -88,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio)
88 return NULL; 88 return NULL;
89} 89}
90 90
91union map_info *dm_get_rq_mapinfo(struct request *rq)
92{
93 if (rq && rq->end_io_data)
94 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
95 return NULL;
96}
97EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
98
91#define MINOR_ALLOCED ((void *)-1) 99#define MINOR_ALLOCED ((void *)-1)
92 100
93/* 101/*
@@ -169,6 +177,12 @@ struct mapped_device {
169 /* forced geometry settings */ 177 /* forced geometry settings */
170 struct hd_geometry geometry; 178 struct hd_geometry geometry;
171 179
180 /* marker of flush suspend for request-based dm */
181 struct request suspend_rq;
182
183 /* For saving the address of __make_request for request based dm */
184 make_request_fn *saved_make_request_fn;
185
172 /* sysfs handle */ 186 /* sysfs handle */
173 struct kobject kobj; 187 struct kobject kobj;
174 188
@@ -406,6 +420,26 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
406 mempool_free(tio, md->tio_pool); 420 mempool_free(tio, md->tio_pool);
407} 421}
408 422
423static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
424{
425 return mempool_alloc(md->tio_pool, GFP_ATOMIC);
426}
427
428static void free_rq_tio(struct dm_rq_target_io *tio)
429{
430 mempool_free(tio, tio->md->tio_pool);
431}
432
433static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
434{
435 return mempool_alloc(md->io_pool, GFP_ATOMIC);
436}
437
438static void free_bio_info(struct dm_rq_clone_bio_info *info)
439{
440 mempool_free(info, info->tio->md->io_pool);
441}
442
409static void start_io_acct(struct dm_io *io) 443static void start_io_acct(struct dm_io *io)
410{ 444{
411 struct mapped_device *md = io->md; 445 struct mapped_device *md = io->md;
@@ -615,6 +649,262 @@ static void clone_endio(struct bio *bio, int error)
615 dec_pending(io, error); 649 dec_pending(io, error);
616} 650}
617 651
652/*
653 * Partial completion handling for request-based dm
654 */
655static void end_clone_bio(struct bio *clone, int error)
656{
657 struct dm_rq_clone_bio_info *info = clone->bi_private;
658 struct dm_rq_target_io *tio = info->tio;
659 struct bio *bio = info->orig;
660 unsigned int nr_bytes = info->orig->bi_size;
661
662 bio_put(clone);
663
664 if (tio->error)
665 /*
666 * An error has already been detected on the request.
667 * Once error occurred, just let clone->end_io() handle
668 * the remainder.
669 */
670 return;
671 else if (error) {
672 /*
673 * Don't notice the error to the upper layer yet.
674 * The error handling decision is made by the target driver,
675 * when the request is completed.
676 */
677 tio->error = error;
678 return;
679 }
680
681 /*
682 * I/O for the bio successfully completed.
683 * Notice the data completion to the upper layer.
684 */
685
686 /*
687 * bios are processed from the head of the list.
688 * So the completing bio should always be rq->bio.
689 * If it's not, something wrong is happening.
690 */
691 if (tio->orig->bio != bio)
692 DMERR("bio completion is going in the middle of the request");
693
694 /*
695 * Update the original request.
696 * Do not use blk_end_request() here, because it may complete
697 * the original request before the clone, and break the ordering.
698 */
699 blk_update_request(tio->orig, 0, nr_bytes);
700}
701
702/*
703 * Don't touch any member of the md after calling this function because
704 * the md may be freed in dm_put() at the end of this function.
705 * Or do dm_get() before calling this function and dm_put() later.
706 */
707static void rq_completed(struct mapped_device *md, int run_queue)
708{
709 int wakeup_waiters = 0;
710 struct request_queue *q = md->queue;
711 unsigned long flags;
712
713 spin_lock_irqsave(q->queue_lock, flags);
714 if (!queue_in_flight(q))
715 wakeup_waiters = 1;
716 spin_unlock_irqrestore(q->queue_lock, flags);
717
718 /* nudge anyone waiting on suspend queue */
719 if (wakeup_waiters)
720 wake_up(&md->wait);
721
722 if (run_queue)
723 blk_run_queue(q);
724
725 /*
726 * dm_put() must be at the end of this function. See the comment above
727 */
728 dm_put(md);
729}
730
731static void dm_unprep_request(struct request *rq)
732{
733 struct request *clone = rq->special;
734 struct dm_rq_target_io *tio = clone->end_io_data;
735
736 rq->special = NULL;
737 rq->cmd_flags &= ~REQ_DONTPREP;
738
739 blk_rq_unprep_clone(clone);
740 free_rq_tio(tio);
741}
742
743/*
744 * Requeue the original request of a clone.
745 */
746void dm_requeue_unmapped_request(struct request *clone)
747{
748 struct dm_rq_target_io *tio = clone->end_io_data;
749 struct mapped_device *md = tio->md;
750 struct request *rq = tio->orig;
751 struct request_queue *q = rq->q;
752 unsigned long flags;
753
754 dm_unprep_request(rq);
755
756 spin_lock_irqsave(q->queue_lock, flags);
757 if (elv_queue_empty(q))
758 blk_plug_device(q);
759 blk_requeue_request(q, rq);
760 spin_unlock_irqrestore(q->queue_lock, flags);
761
762 rq_completed(md, 0);
763}
764EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
765
766static void __stop_queue(struct request_queue *q)
767{
768 blk_stop_queue(q);
769}
770
771static void stop_queue(struct request_queue *q)
772{
773 unsigned long flags;
774
775 spin_lock_irqsave(q->queue_lock, flags);
776 __stop_queue(q);
777 spin_unlock_irqrestore(q->queue_lock, flags);
778}
779
780static void __start_queue(struct request_queue *q)
781{
782 if (blk_queue_stopped(q))
783 blk_start_queue(q);
784}
785
786static void start_queue(struct request_queue *q)
787{
788 unsigned long flags;
789
790 spin_lock_irqsave(q->queue_lock, flags);
791 __start_queue(q);
792 spin_unlock_irqrestore(q->queue_lock, flags);
793}
794
795/*
796 * Complete the clone and the original request.
797 * Must be called without queue lock.
798 */
799static void dm_end_request(struct request *clone, int error)
800{
801 struct dm_rq_target_io *tio = clone->end_io_data;
802 struct mapped_device *md = tio->md;
803 struct request *rq = tio->orig;
804
805 if (blk_pc_request(rq)) {
806 rq->errors = clone->errors;
807 rq->resid_len = clone->resid_len;
808
809 if (rq->sense)
810 /*
811 * We are using the sense buffer of the original
812 * request.
813 * So setting the length of the sense data is enough.
814 */
815 rq->sense_len = clone->sense_len;
816 }
817
818 BUG_ON(clone->bio);
819 free_rq_tio(tio);
820
821 blk_end_request_all(rq, error);
822
823 rq_completed(md, 1);
824}
825
826/*
827 * Request completion handler for request-based dm
828 */
829static void dm_softirq_done(struct request *rq)
830{
831 struct request *clone = rq->completion_data;
832 struct dm_rq_target_io *tio = clone->end_io_data;
833 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
834 int error = tio->error;
835
836 if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io)
837 error = rq_end_io(tio->ti, clone, error, &tio->info);
838
839 if (error <= 0)
840 /* The target wants to complete the I/O */
841 dm_end_request(clone, error);
842 else if (error == DM_ENDIO_INCOMPLETE)
843 /* The target will handle the I/O */
844 return;
845 else if (error == DM_ENDIO_REQUEUE)
846 /* The target wants to requeue the I/O */
847 dm_requeue_unmapped_request(clone);
848 else {
849 DMWARN("unimplemented target endio return value: %d", error);
850 BUG();
851 }
852}
853
854/*
855 * Complete the clone and the original request with the error status
856 * through softirq context.
857 */
858static void dm_complete_request(struct request *clone, int error)
859{
860 struct dm_rq_target_io *tio = clone->end_io_data;
861 struct request *rq = tio->orig;
862
863 tio->error = error;
864 rq->completion_data = clone;
865 blk_complete_request(rq);
866}
867
868/*
869 * Complete the not-mapped clone and the original request with the error status
870 * through softirq context.
871 * Target's rq_end_io() function isn't called.
872 * This may be used when the target's map_rq() function fails.
873 */
874void dm_kill_unmapped_request(struct request *clone, int error)
875{
876 struct dm_rq_target_io *tio = clone->end_io_data;
877 struct request *rq = tio->orig;
878
879 rq->cmd_flags |= REQ_FAILED;
880 dm_complete_request(clone, error);
881}
882EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
883
884/*
885 * Called with the queue lock held
886 */
887static void end_clone_request(struct request *clone, int error)
888{
889 /*
890 * For just cleaning up the information of the queue in which
891 * the clone was dispatched.
892 * The clone is *NOT* freed actually here because it is alloced from
893 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
894 */
895 __blk_put_request(clone->q, clone);
896
897 /*
898 * Actual request completion is done in a softirq context which doesn't
899 * hold the queue lock. Otherwise, deadlock could occur because:
900 * - another request may be submitted by the upper level driver
901 * of the stacking during the completion
902 * - the submission which requires queue lock may be done
903 * against this queue
904 */
905 dm_complete_request(clone, error);
906}
907
618static sector_t max_io_len(struct mapped_device *md, 908static sector_t max_io_len(struct mapped_device *md,
619 sector_t sector, struct dm_target *ti) 909 sector_t sector, struct dm_target *ti)
620{ 910{
@@ -998,7 +1288,7 @@ out:
998 * The request function that just remaps the bio built up by 1288 * The request function that just remaps the bio built up by
999 * dm_merge_bvec. 1289 * dm_merge_bvec.
1000 */ 1290 */
1001static int dm_request(struct request_queue *q, struct bio *bio) 1291static int _dm_request(struct request_queue *q, struct bio *bio)
1002{ 1292{
1003 int rw = bio_data_dir(bio); 1293 int rw = bio_data_dir(bio);
1004 struct mapped_device *md = q->queuedata; 1294 struct mapped_device *md = q->queuedata;
@@ -1035,12 +1325,274 @@ static int dm_request(struct request_queue *q, struct bio *bio)
1035 return 0; 1325 return 0;
1036} 1326}
1037 1327
1328static int dm_make_request(struct request_queue *q, struct bio *bio)
1329{
1330 struct mapped_device *md = q->queuedata;
1331
1332 if (unlikely(bio_barrier(bio))) {
1333 bio_endio(bio, -EOPNOTSUPP);
1334 return 0;
1335 }
1336
1337 return md->saved_make_request_fn(q, bio); /* call __make_request() */
1338}
1339
1340static int dm_request_based(struct mapped_device *md)
1341{
1342 return blk_queue_stackable(md->queue);
1343}
1344
1345static int dm_request(struct request_queue *q, struct bio *bio)
1346{
1347 struct mapped_device *md = q->queuedata;
1348
1349 if (dm_request_based(md))
1350 return dm_make_request(q, bio);
1351
1352 return _dm_request(q, bio);
1353}
1354
1355void dm_dispatch_request(struct request *rq)
1356{
1357 int r;
1358
1359 if (blk_queue_io_stat(rq->q))
1360 rq->cmd_flags |= REQ_IO_STAT;
1361
1362 rq->start_time = jiffies;
1363 r = blk_insert_cloned_request(rq->q, rq);
1364 if (r)
1365 dm_complete_request(rq, r);
1366}
1367EXPORT_SYMBOL_GPL(dm_dispatch_request);
1368
1369static void dm_rq_bio_destructor(struct bio *bio)
1370{
1371 struct dm_rq_clone_bio_info *info = bio->bi_private;
1372 struct mapped_device *md = info->tio->md;
1373
1374 free_bio_info(info);
1375 bio_free(bio, md->bs);
1376}
1377
1378static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1379 void *data)
1380{
1381 struct dm_rq_target_io *tio = data;
1382 struct mapped_device *md = tio->md;
1383 struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
1384
1385 if (!info)
1386 return -ENOMEM;
1387
1388 info->orig = bio_orig;
1389 info->tio = tio;
1390 bio->bi_end_io = end_clone_bio;
1391 bio->bi_private = info;
1392 bio->bi_destructor = dm_rq_bio_destructor;
1393
1394 return 0;
1395}
1396
1397static int setup_clone(struct request *clone, struct request *rq,
1398 struct dm_rq_target_io *tio)
1399{
1400 int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1401 dm_rq_bio_constructor, tio);
1402
1403 if (r)
1404 return r;
1405
1406 clone->cmd = rq->cmd;
1407 clone->cmd_len = rq->cmd_len;
1408 clone->sense = rq->sense;
1409 clone->buffer = rq->buffer;
1410 clone->end_io = end_clone_request;
1411 clone->end_io_data = tio;
1412
1413 return 0;
1414}
1415
1416static int dm_rq_flush_suspending(struct mapped_device *md)
1417{
1418 return !md->suspend_rq.special;
1419}
1420
1421/*
1422 * Called with the queue lock held.
1423 */
1424static int dm_prep_fn(struct request_queue *q, struct request *rq)
1425{
1426 struct mapped_device *md = q->queuedata;
1427 struct dm_rq_target_io *tio;
1428 struct request *clone;
1429
1430 if (unlikely(rq == &md->suspend_rq)) {
1431 if (dm_rq_flush_suspending(md))
1432 return BLKPREP_OK;
1433 else
1434 /* The flush suspend was interrupted */
1435 return BLKPREP_KILL;
1436 }
1437
1438 if (unlikely(rq->special)) {
1439 DMWARN("Already has something in rq->special.");
1440 return BLKPREP_KILL;
1441 }
1442
1443 tio = alloc_rq_tio(md); /* Only one for each original request */
1444 if (!tio)
1445 /* -ENOMEM */
1446 return BLKPREP_DEFER;
1447
1448 tio->md = md;
1449 tio->ti = NULL;
1450 tio->orig = rq;
1451 tio->error = 0;
1452 memset(&tio->info, 0, sizeof(tio->info));
1453
1454 clone = &tio->clone;
1455 if (setup_clone(clone, rq, tio)) {
1456 /* -ENOMEM */
1457 free_rq_tio(tio);
1458 return BLKPREP_DEFER;
1459 }
1460
1461 rq->special = clone;
1462 rq->cmd_flags |= REQ_DONTPREP;
1463
1464 return BLKPREP_OK;
1465}
1466
1467static void map_request(struct dm_target *ti, struct request *rq,
1468 struct mapped_device *md)
1469{
1470 int r;
1471 struct request *clone = rq->special;
1472 struct dm_rq_target_io *tio = clone->end_io_data;
1473
1474 /*
1475 * Hold the md reference here for the in-flight I/O.
1476 * We can't rely on the reference count by device opener,
1477 * because the device may be closed during the request completion
1478 * when all bios are completed.
1479 * See the comment in rq_completed() too.
1480 */
1481 dm_get(md);
1482
1483 tio->ti = ti;
1484 r = ti->type->map_rq(ti, clone, &tio->info);
1485 switch (r) {
1486 case DM_MAPIO_SUBMITTED:
1487 /* The target has taken the I/O to submit by itself later */
1488 break;
1489 case DM_MAPIO_REMAPPED:
1490 /* The target has remapped the I/O so dispatch it */
1491 dm_dispatch_request(clone);
1492 break;
1493 case DM_MAPIO_REQUEUE:
1494 /* The target wants to requeue the I/O */
1495 dm_requeue_unmapped_request(clone);
1496 break;
1497 default:
1498 if (r > 0) {
1499 DMWARN("unimplemented target map return value: %d", r);
1500 BUG();
1501 }
1502
1503 /* The target wants to complete the I/O */
1504 dm_kill_unmapped_request(clone, r);
1505 break;
1506 }
1507}
1508
1509/*
1510 * q->request_fn for request-based dm.
1511 * Called with the queue lock held.
1512 */
1513static void dm_request_fn(struct request_queue *q)
1514{
1515 struct mapped_device *md = q->queuedata;
1516 struct dm_table *map = dm_get_table(md);
1517 struct dm_target *ti;
1518 struct request *rq;
1519
1520 /*
1521 * For noflush suspend, check blk_queue_stopped() to immediately
1522 * quit I/O dispatching.
1523 */
1524 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1525 rq = blk_peek_request(q);
1526 if (!rq)
1527 goto plug_and_out;
1528
1529 if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */
1530 if (queue_in_flight(q))
1531 /* Not quiet yet. Wait more */
1532 goto plug_and_out;
1533
1534 /* This device should be quiet now */
1535 __stop_queue(q);
1536 blk_start_request(rq);
1537 __blk_end_request_all(rq, 0);
1538 wake_up(&md->wait);
1539 goto out;
1540 }
1541
1542 ti = dm_table_find_target(map, blk_rq_pos(rq));
1543 if (ti->type->busy && ti->type->busy(ti))
1544 goto plug_and_out;
1545
1546 blk_start_request(rq);
1547 spin_unlock(q->queue_lock);
1548 map_request(ti, rq, md);
1549 spin_lock_irq(q->queue_lock);
1550 }
1551
1552 goto out;
1553
1554plug_and_out:
1555 if (!elv_queue_empty(q))
1556 /* Some requests still remain, retry later */
1557 blk_plug_device(q);
1558
1559out:
1560 dm_table_put(map);
1561
1562 return;
1563}
1564
1565int dm_underlying_device_busy(struct request_queue *q)
1566{
1567 return blk_lld_busy(q);
1568}
1569EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
1570
1571static int dm_lld_busy(struct request_queue *q)
1572{
1573 int r;
1574 struct mapped_device *md = q->queuedata;
1575 struct dm_table *map = dm_get_table(md);
1576
1577 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
1578 r = 1;
1579 else
1580 r = dm_table_any_busy_target(map);
1581
1582 dm_table_put(map);
1583
1584 return r;
1585}
1586
1038static void dm_unplug_all(struct request_queue *q) 1587static void dm_unplug_all(struct request_queue *q)
1039{ 1588{
1040 struct mapped_device *md = q->queuedata; 1589 struct mapped_device *md = q->queuedata;
1041 struct dm_table *map = dm_get_table(md); 1590 struct dm_table *map = dm_get_table(md);
1042 1591
1043 if (map) { 1592 if (map) {
1593 if (dm_request_based(md))
1594 generic_unplug_device(q);
1595
1044 dm_table_unplug_all(map); 1596 dm_table_unplug_all(map);
1045 dm_table_put(map); 1597 dm_table_put(map);
1046 } 1598 }
@@ -1055,7 +1607,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
1055 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1607 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1056 map = dm_get_table(md); 1608 map = dm_get_table(md);
1057 if (map) { 1609 if (map) {
1058 r = dm_table_any_congested(map, bdi_bits); 1610 /*
1611 * Request-based dm cares about only own queue for
1612 * the query about congestion status of request_queue
1613 */
1614 if (dm_request_based(md))
1615 r = md->queue->backing_dev_info.state &
1616 bdi_bits;
1617 else
1618 r = dm_table_any_congested(map, bdi_bits);
1619
1059 dm_table_put(map); 1620 dm_table_put(map);
1060 } 1621 }
1061 } 1622 }
@@ -1458,6 +2019,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1458{ 2019{
1459 int r = 0; 2020 int r = 0;
1460 DECLARE_WAITQUEUE(wait, current); 2021 DECLARE_WAITQUEUE(wait, current);
2022 struct request_queue *q = md->queue;
2023 unsigned long flags;
1461 2024
1462 dm_unplug_all(md->queue); 2025 dm_unplug_all(md->queue);
1463 2026
@@ -1467,7 +2030,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1467 set_current_state(interruptible); 2030 set_current_state(interruptible);
1468 2031
1469 smp_mb(); 2032 smp_mb();
1470 if (!atomic_read(&md->pending)) 2033 if (dm_request_based(md)) {
2034 spin_lock_irqsave(q->queue_lock, flags);
2035 if (!queue_in_flight(q) && blk_queue_stopped(q)) {
2036 spin_unlock_irqrestore(q->queue_lock, flags);
2037 break;
2038 }
2039 spin_unlock_irqrestore(q->queue_lock, flags);
2040 } else if (!atomic_read(&md->pending))
1471 break; 2041 break;
1472 2042
1473 if (interruptible == TASK_INTERRUPTIBLE && 2043 if (interruptible == TASK_INTERRUPTIBLE &&
@@ -1584,6 +2154,67 @@ out:
1584 return r; 2154 return r;
1585} 2155}
1586 2156
2157static void dm_rq_invalidate_suspend_marker(struct mapped_device *md)
2158{
2159 md->suspend_rq.special = (void *)0x1;
2160}
2161
2162static void dm_rq_abort_suspend(struct mapped_device *md, int noflush)
2163{
2164 struct request_queue *q = md->queue;
2165 unsigned long flags;
2166
2167 spin_lock_irqsave(q->queue_lock, flags);
2168 if (!noflush)
2169 dm_rq_invalidate_suspend_marker(md);
2170 __start_queue(q);
2171 spin_unlock_irqrestore(q->queue_lock, flags);
2172}
2173
2174static void dm_rq_start_suspend(struct mapped_device *md, int noflush)
2175{
2176 struct request *rq = &md->suspend_rq;
2177 struct request_queue *q = md->queue;
2178
2179 if (noflush)
2180 stop_queue(q);
2181 else {
2182 blk_rq_init(q, rq);
2183 blk_insert_request(q, rq, 0, NULL);
2184 }
2185}
2186
2187static int dm_rq_suspend_available(struct mapped_device *md, int noflush)
2188{
2189 int r = 1;
2190 struct request *rq = &md->suspend_rq;
2191 struct request_queue *q = md->queue;
2192 unsigned long flags;
2193
2194 if (noflush)
2195 return r;
2196
2197 /* The marker must be protected by queue lock if it is in use */
2198 spin_lock_irqsave(q->queue_lock, flags);
2199 if (unlikely(rq->ref_count)) {
2200 /*
2201 * This can happen, when the previous flush suspend was
2202 * interrupted, the marker is still in the queue and
2203 * this flush suspend has been invoked, because we don't
2204 * remove the marker at the time of suspend interruption.
2205 * We have only one marker per mapped_device, so we can't
2206 * start another flush suspend while it is in use.
2207 */
2208 BUG_ON(!rq->special); /* The marker should be invalidated */
2209 DMWARN("Invalidating the previous flush suspend is still in"
2210 " progress. Please retry later.");
2211 r = 0;
2212 }
2213 spin_unlock_irqrestore(q->queue_lock, flags);
2214
2215 return r;
2216}
2217
1587/* 2218/*
1588 * Functions to lock and unlock any filesystem running on the 2219 * Functions to lock and unlock any filesystem running on the
1589 * device. 2220 * device.
@@ -1623,6 +2254,53 @@ static void unlock_fs(struct mapped_device *md)
1623 * dm_bind_table, dm_suspend must be called to flush any in 2254 * dm_bind_table, dm_suspend must be called to flush any in
1624 * flight bios and ensure that any further io gets deferred. 2255 * flight bios and ensure that any further io gets deferred.
1625 */ 2256 */
2257/*
2258 * Suspend mechanism in request-based dm.
2259 *
2260 * After the suspend starts, further incoming requests are kept in
2261 * the request_queue and deferred.
2262 * Remaining requests in the request_queue at the start of suspend are flushed
2263 * if it is flush suspend.
2264 * The suspend completes when the following conditions have been satisfied,
2265 * so wait for it:
2266 * 1. q->in_flight is 0 (which means no in_flight request)
2267 * 2. queue has been stopped (which means no request dispatching)
2268 *
2269 *
2270 * Noflush suspend
2271 * ---------------
2272 * Noflush suspend doesn't need to dispatch remaining requests.
2273 * So stop the queue immediately. Then, wait for all in_flight requests
2274 * to be completed or requeued.
2275 *
2276 * To abort noflush suspend, start the queue.
2277 *
2278 *
2279 * Flush suspend
2280 * -------------
2281 * Flush suspend needs to dispatch remaining requests. So stop the queue
2282 * after the remaining requests are completed. (Requeued request must be also
2283 * re-dispatched and completed. Until then, we can't stop the queue.)
2284 *
2285 * During flushing the remaining requests, further incoming requests are also
2286 * inserted to the same queue. To distinguish which requests are to be
2287 * flushed, we insert a marker request to the queue at the time of starting
2288 * flush suspend, like a barrier.
2289 * The dispatching is blocked when the marker is found on the top of the queue.
2290 * And the queue is stopped when all in_flight requests are completed, since
2291 * that means the remaining requests are completely flushed.
2292 * Then, the marker is removed from the queue.
2293 *
2294 * To abort flush suspend, we also need to take care of the marker, not only
2295 * starting the queue.
2296 * We don't remove the marker forcibly from the queue since it's against
2297 * the block-layer manner. Instead, we put a invalidated mark on the marker.
2298 * When the invalidated marker is found on the top of the queue, it is
2299 * immediately removed from the queue, so it doesn't block dispatching.
2300 * Because we have only one marker per mapped_device, we can't start another
2301 * flush suspend until the invalidated marker is removed from the queue.
2302 * So fail and return with -EBUSY in such a case.
2303 */
1626int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2304int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1627{ 2305{
1628 struct dm_table *map = NULL; 2306 struct dm_table *map = NULL;
@@ -1637,6 +2315,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1637 goto out_unlock; 2315 goto out_unlock;
1638 } 2316 }
1639 2317
2318 if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) {
2319 r = -EBUSY;
2320 goto out_unlock;
2321 }
2322
1640 map = dm_get_table(md); 2323 map = dm_get_table(md);
1641 2324
1642 /* 2325 /*
@@ -1682,6 +2365,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1682 2365
1683 flush_workqueue(md->wq); 2366 flush_workqueue(md->wq);
1684 2367
2368 if (dm_request_based(md))
2369 dm_rq_start_suspend(md, noflush);
2370
1685 /* 2371 /*
1686 * At this point no more requests are entering target request routines. 2372 * At this point no more requests are entering target request routines.
1687 * We call dm_wait_for_completion to wait for all existing requests 2373 * We call dm_wait_for_completion to wait for all existing requests
@@ -1698,6 +2384,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1698 if (r < 0) { 2384 if (r < 0) {
1699 dm_queue_flush(md); 2385 dm_queue_flush(md);
1700 2386
2387 if (dm_request_based(md))
2388 dm_rq_abort_suspend(md, noflush);
2389
1701 unlock_fs(md); 2390 unlock_fs(md);
1702 goto out; /* pushback list is already flushed, so skip flush */ 2391 goto out; /* pushback list is already flushed, so skip flush */
1703 } 2392 }
@@ -1739,6 +2428,14 @@ int dm_resume(struct mapped_device *md)
1739 2428
1740 dm_queue_flush(md); 2429 dm_queue_flush(md);
1741 2430
2431 /*
2432 * Flushing deferred I/Os must be done after targets are resumed
2433 * so that mapping of targets can work correctly.
2434 * Request-based dm is queueing the deferred I/Os in its request_queue.
2435 */
2436 if (dm_request_based(md))
2437 start_queue(md->queue);
2438
1742 unlock_fs(md); 2439 unlock_fs(md);
1743 2440
1744 clear_bit(DMF_SUSPENDED, &md->flags); 2441 clear_bit(DMF_SUSPENDED, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 604e85caadf..8dcabb1caff 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -50,6 +50,7 @@ void dm_table_presuspend_targets(struct dm_table *t);
50void dm_table_postsuspend_targets(struct dm_table *t); 50void dm_table_postsuspend_targets(struct dm_table *t);
51int dm_table_resume_targets(struct dm_table *t); 51int dm_table_resume_targets(struct dm_table *t);
52int dm_table_any_congested(struct dm_table *t, int bdi_bits); 52int dm_table_any_congested(struct dm_table *t, int bdi_bits);
53int dm_table_any_busy_target(struct dm_table *t);
53 54
54/* 55/*
55 * To check the return value from dm_table_find_target(). 56 * To check the return value from dm_table_find_target().
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e6bf3b8c7bf..0d6310657f3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -234,6 +234,7 @@ struct gendisk *dm_disk(struct mapped_device *md);
234int dm_suspended(struct mapped_device *md); 234int dm_suspended(struct mapped_device *md);
235int dm_noflush_suspending(struct dm_target *ti); 235int dm_noflush_suspending(struct dm_target *ti);
236union map_info *dm_get_mapinfo(struct bio *bio); 236union map_info *dm_get_mapinfo(struct bio *bio);
237union map_info *dm_get_rq_mapinfo(struct request *rq);
237 238
238/* 239/*
239 * Geometry functions. 240 * Geometry functions.
@@ -396,4 +397,12 @@ static inline unsigned long to_bytes(sector_t n)
396 return (n << SECTOR_SHIFT); 397 return (n << SECTOR_SHIFT);
397} 398}
398 399
400/*-----------------------------------------------------------------
401 * Helper for block layer and dm core operations
402 *---------------------------------------------------------------*/
403void dm_dispatch_request(struct request *rq);
404void dm_requeue_unmapped_request(struct request *rq);
405void dm_kill_unmapped_request(struct request *rq, int error);
406int dm_underlying_device_busy(struct request_queue *q);
407
399#endif /* _LINUX_DEVICE_MAPPER_H */ 408#endif /* _LINUX_DEVICE_MAPPER_H */