diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-table.c | 14 | ||||
-rw-r--r-- | drivers/md/dm.c | 705 | ||||
-rw-r--r-- | drivers/md/dm.h | 1 |
3 files changed, 716 insertions, 4 deletions
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 09a57113955e..c5f784419f23 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -1080,6 +1080,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) | |||
1080 | return r; | 1080 | return r; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | int dm_table_any_busy_target(struct dm_table *t) | ||
1084 | { | ||
1085 | unsigned i; | ||
1086 | struct dm_target *ti; | ||
1087 | |||
1088 | for (i = 0; i < t->num_targets; i++) { | ||
1089 | ti = t->targets + i; | ||
1090 | if (ti->type->busy && ti->type->busy(ti)) | ||
1091 | return 1; | ||
1092 | } | ||
1093 | |||
1094 | return 0; | ||
1095 | } | ||
1096 | |||
1083 | void dm_table_unplug_all(struct dm_table *t) | 1097 | void dm_table_unplug_all(struct dm_table *t) |
1084 | { | 1098 | { |
1085 | struct dm_dev_internal *dd; | 1099 | struct dm_dev_internal *dd; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f609793a92d0..be003e5fea3d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -78,7 +78,7 @@ struct dm_rq_target_io { | |||
78 | */ | 78 | */ |
79 | struct dm_rq_clone_bio_info { | 79 | struct dm_rq_clone_bio_info { |
80 | struct bio *orig; | 80 | struct bio *orig; |
81 | struct request *rq; | 81 | struct dm_rq_target_io *tio; |
82 | }; | 82 | }; |
83 | 83 | ||
84 | union map_info *dm_get_mapinfo(struct bio *bio) | 84 | union map_info *dm_get_mapinfo(struct bio *bio) |
@@ -88,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
88 | return NULL; | 88 | return NULL; |
89 | } | 89 | } |
90 | 90 | ||
91 | union map_info *dm_get_rq_mapinfo(struct request *rq) | ||
92 | { | ||
93 | if (rq && rq->end_io_data) | ||
94 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; | ||
95 | return NULL; | ||
96 | } | ||
97 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | ||
98 | |||
91 | #define MINOR_ALLOCED ((void *)-1) | 99 | #define MINOR_ALLOCED ((void *)-1) |
92 | 100 | ||
93 | /* | 101 | /* |
@@ -169,6 +177,12 @@ struct mapped_device { | |||
169 | /* forced geometry settings */ | 177 | /* forced geometry settings */ |
170 | struct hd_geometry geometry; | 178 | struct hd_geometry geometry; |
171 | 179 | ||
180 | /* marker of flush suspend for request-based dm */ | ||
181 | struct request suspend_rq; | ||
182 | |||
183 | /* For saving the address of __make_request for request based dm */ | ||
184 | make_request_fn *saved_make_request_fn; | ||
185 | |||
172 | /* sysfs handle */ | 186 | /* sysfs handle */ |
173 | struct kobject kobj; | 187 | struct kobject kobj; |
174 | 188 | ||
@@ -406,6 +420,26 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | |||
406 | mempool_free(tio, md->tio_pool); | 420 | mempool_free(tio, md->tio_pool); |
407 | } | 421 | } |
408 | 422 | ||
423 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) | ||
424 | { | ||
425 | return mempool_alloc(md->tio_pool, GFP_ATOMIC); | ||
426 | } | ||
427 | |||
428 | static void free_rq_tio(struct dm_rq_target_io *tio) | ||
429 | { | ||
430 | mempool_free(tio, tio->md->tio_pool); | ||
431 | } | ||
432 | |||
433 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | ||
434 | { | ||
435 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | ||
436 | } | ||
437 | |||
438 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | ||
439 | { | ||
440 | mempool_free(info, info->tio->md->io_pool); | ||
441 | } | ||
442 | |||
409 | static void start_io_acct(struct dm_io *io) | 443 | static void start_io_acct(struct dm_io *io) |
410 | { | 444 | { |
411 | struct mapped_device *md = io->md; | 445 | struct mapped_device *md = io->md; |
@@ -615,6 +649,262 @@ static void clone_endio(struct bio *bio, int error) | |||
615 | dec_pending(io, error); | 649 | dec_pending(io, error); |
616 | } | 650 | } |
617 | 651 | ||
652 | /* | ||
653 | * Partial completion handling for request-based dm | ||
654 | */ | ||
655 | static void end_clone_bio(struct bio *clone, int error) | ||
656 | { | ||
657 | struct dm_rq_clone_bio_info *info = clone->bi_private; | ||
658 | struct dm_rq_target_io *tio = info->tio; | ||
659 | struct bio *bio = info->orig; | ||
660 | unsigned int nr_bytes = info->orig->bi_size; | ||
661 | |||
662 | bio_put(clone); | ||
663 | |||
664 | if (tio->error) | ||
665 | /* | ||
666 | * An error has already been detected on the request. | ||
667 | * Once error occurred, just let clone->end_io() handle | ||
668 | * the remainder. | ||
669 | */ | ||
670 | return; | ||
671 | else if (error) { | ||
672 | /* | ||
673 | * Don't notice the error to the upper layer yet. | ||
674 | * The error handling decision is made by the target driver, | ||
675 | * when the request is completed. | ||
676 | */ | ||
677 | tio->error = error; | ||
678 | return; | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * I/O for the bio successfully completed. | ||
683 | * Notice the data completion to the upper layer. | ||
684 | */ | ||
685 | |||
686 | /* | ||
687 | * bios are processed from the head of the list. | ||
688 | * So the completing bio should always be rq->bio. | ||
689 | * If it's not, something wrong is happening. | ||
690 | */ | ||
691 | if (tio->orig->bio != bio) | ||
692 | DMERR("bio completion is going in the middle of the request"); | ||
693 | |||
694 | /* | ||
695 | * Update the original request. | ||
696 | * Do not use blk_end_request() here, because it may complete | ||
697 | * the original request before the clone, and break the ordering. | ||
698 | */ | ||
699 | blk_update_request(tio->orig, 0, nr_bytes); | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * Don't touch any member of the md after calling this function because | ||
704 | * the md may be freed in dm_put() at the end of this function. | ||
705 | * Or do dm_get() before calling this function and dm_put() later. | ||
706 | */ | ||
707 | static void rq_completed(struct mapped_device *md, int run_queue) | ||
708 | { | ||
709 | int wakeup_waiters = 0; | ||
710 | struct request_queue *q = md->queue; | ||
711 | unsigned long flags; | ||
712 | |||
713 | spin_lock_irqsave(q->queue_lock, flags); | ||
714 | if (!queue_in_flight(q)) | ||
715 | wakeup_waiters = 1; | ||
716 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
717 | |||
718 | /* nudge anyone waiting on suspend queue */ | ||
719 | if (wakeup_waiters) | ||
720 | wake_up(&md->wait); | ||
721 | |||
722 | if (run_queue) | ||
723 | blk_run_queue(q); | ||
724 | |||
725 | /* | ||
726 | * dm_put() must be at the end of this function. See the comment above | ||
727 | */ | ||
728 | dm_put(md); | ||
729 | } | ||
730 | |||
731 | static void dm_unprep_request(struct request *rq) | ||
732 | { | ||
733 | struct request *clone = rq->special; | ||
734 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
735 | |||
736 | rq->special = NULL; | ||
737 | rq->cmd_flags &= ~REQ_DONTPREP; | ||
738 | |||
739 | blk_rq_unprep_clone(clone); | ||
740 | free_rq_tio(tio); | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Requeue the original request of a clone. | ||
745 | */ | ||
746 | void dm_requeue_unmapped_request(struct request *clone) | ||
747 | { | ||
748 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
749 | struct mapped_device *md = tio->md; | ||
750 | struct request *rq = tio->orig; | ||
751 | struct request_queue *q = rq->q; | ||
752 | unsigned long flags; | ||
753 | |||
754 | dm_unprep_request(rq); | ||
755 | |||
756 | spin_lock_irqsave(q->queue_lock, flags); | ||
757 | if (elv_queue_empty(q)) | ||
758 | blk_plug_device(q); | ||
759 | blk_requeue_request(q, rq); | ||
760 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
761 | |||
762 | rq_completed(md, 0); | ||
763 | } | ||
764 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); | ||
765 | |||
766 | static void __stop_queue(struct request_queue *q) | ||
767 | { | ||
768 | blk_stop_queue(q); | ||
769 | } | ||
770 | |||
771 | static void stop_queue(struct request_queue *q) | ||
772 | { | ||
773 | unsigned long flags; | ||
774 | |||
775 | spin_lock_irqsave(q->queue_lock, flags); | ||
776 | __stop_queue(q); | ||
777 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
778 | } | ||
779 | |||
780 | static void __start_queue(struct request_queue *q) | ||
781 | { | ||
782 | if (blk_queue_stopped(q)) | ||
783 | blk_start_queue(q); | ||
784 | } | ||
785 | |||
786 | static void start_queue(struct request_queue *q) | ||
787 | { | ||
788 | unsigned long flags; | ||
789 | |||
790 | spin_lock_irqsave(q->queue_lock, flags); | ||
791 | __start_queue(q); | ||
792 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
793 | } | ||
794 | |||
795 | /* | ||
796 | * Complete the clone and the original request. | ||
797 | * Must be called without queue lock. | ||
798 | */ | ||
799 | static void dm_end_request(struct request *clone, int error) | ||
800 | { | ||
801 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
802 | struct mapped_device *md = tio->md; | ||
803 | struct request *rq = tio->orig; | ||
804 | |||
805 | if (blk_pc_request(rq)) { | ||
806 | rq->errors = clone->errors; | ||
807 | rq->resid_len = clone->resid_len; | ||
808 | |||
809 | if (rq->sense) | ||
810 | /* | ||
811 | * We are using the sense buffer of the original | ||
812 | * request. | ||
813 | * So setting the length of the sense data is enough. | ||
814 | */ | ||
815 | rq->sense_len = clone->sense_len; | ||
816 | } | ||
817 | |||
818 | BUG_ON(clone->bio); | ||
819 | free_rq_tio(tio); | ||
820 | |||
821 | blk_end_request_all(rq, error); | ||
822 | |||
823 | rq_completed(md, 1); | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * Request completion handler for request-based dm | ||
828 | */ | ||
829 | static void dm_softirq_done(struct request *rq) | ||
830 | { | ||
831 | struct request *clone = rq->completion_data; | ||
832 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
833 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | ||
834 | int error = tio->error; | ||
835 | |||
836 | if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io) | ||
837 | error = rq_end_io(tio->ti, clone, error, &tio->info); | ||
838 | |||
839 | if (error <= 0) | ||
840 | /* The target wants to complete the I/O */ | ||
841 | dm_end_request(clone, error); | ||
842 | else if (error == DM_ENDIO_INCOMPLETE) | ||
843 | /* The target will handle the I/O */ | ||
844 | return; | ||
845 | else if (error == DM_ENDIO_REQUEUE) | ||
846 | /* The target wants to requeue the I/O */ | ||
847 | dm_requeue_unmapped_request(clone); | ||
848 | else { | ||
849 | DMWARN("unimplemented target endio return value: %d", error); | ||
850 | BUG(); | ||
851 | } | ||
852 | } | ||
853 | |||
854 | /* | ||
855 | * Complete the clone and the original request with the error status | ||
856 | * through softirq context. | ||
857 | */ | ||
858 | static void dm_complete_request(struct request *clone, int error) | ||
859 | { | ||
860 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
861 | struct request *rq = tio->orig; | ||
862 | |||
863 | tio->error = error; | ||
864 | rq->completion_data = clone; | ||
865 | blk_complete_request(rq); | ||
866 | } | ||
867 | |||
868 | /* | ||
869 | * Complete the not-mapped clone and the original request with the error status | ||
870 | * through softirq context. | ||
871 | * Target's rq_end_io() function isn't called. | ||
872 | * This may be used when the target's map_rq() function fails. | ||
873 | */ | ||
874 | void dm_kill_unmapped_request(struct request *clone, int error) | ||
875 | { | ||
876 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
877 | struct request *rq = tio->orig; | ||
878 | |||
879 | rq->cmd_flags |= REQ_FAILED; | ||
880 | dm_complete_request(clone, error); | ||
881 | } | ||
882 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); | ||
883 | |||
884 | /* | ||
885 | * Called with the queue lock held | ||
886 | */ | ||
887 | static void end_clone_request(struct request *clone, int error) | ||
888 | { | ||
889 | /* | ||
890 | * For just cleaning up the information of the queue in which | ||
891 | * the clone was dispatched. | ||
892 | * The clone is *NOT* freed actually here because it is alloced from | ||
893 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. | ||
894 | */ | ||
895 | __blk_put_request(clone->q, clone); | ||
896 | |||
897 | /* | ||
898 | * Actual request completion is done in a softirq context which doesn't | ||
899 | * hold the queue lock. Otherwise, deadlock could occur because: | ||
900 | * - another request may be submitted by the upper level driver | ||
901 | * of the stacking during the completion | ||
902 | * - the submission which requires queue lock may be done | ||
903 | * against this queue | ||
904 | */ | ||
905 | dm_complete_request(clone, error); | ||
906 | } | ||
907 | |||
618 | static sector_t max_io_len(struct mapped_device *md, | 908 | static sector_t max_io_len(struct mapped_device *md, |
619 | sector_t sector, struct dm_target *ti) | 909 | sector_t sector, struct dm_target *ti) |
620 | { | 910 | { |
@@ -998,7 +1288,7 @@ out: | |||
998 | * The request function that just remaps the bio built up by | 1288 | * The request function that just remaps the bio built up by |
999 | * dm_merge_bvec. | 1289 | * dm_merge_bvec. |
1000 | */ | 1290 | */ |
1001 | static int dm_request(struct request_queue *q, struct bio *bio) | 1291 | static int _dm_request(struct request_queue *q, struct bio *bio) |
1002 | { | 1292 | { |
1003 | int rw = bio_data_dir(bio); | 1293 | int rw = bio_data_dir(bio); |
1004 | struct mapped_device *md = q->queuedata; | 1294 | struct mapped_device *md = q->queuedata; |
@@ -1035,12 +1325,274 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
1035 | return 0; | 1325 | return 0; |
1036 | } | 1326 | } |
1037 | 1327 | ||
1328 | static int dm_make_request(struct request_queue *q, struct bio *bio) | ||
1329 | { | ||
1330 | struct mapped_device *md = q->queuedata; | ||
1331 | |||
1332 | if (unlikely(bio_barrier(bio))) { | ||
1333 | bio_endio(bio, -EOPNOTSUPP); | ||
1334 | return 0; | ||
1335 | } | ||
1336 | |||
1337 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | ||
1338 | } | ||
1339 | |||
1340 | static int dm_request_based(struct mapped_device *md) | ||
1341 | { | ||
1342 | return blk_queue_stackable(md->queue); | ||
1343 | } | ||
1344 | |||
1345 | static int dm_request(struct request_queue *q, struct bio *bio) | ||
1346 | { | ||
1347 | struct mapped_device *md = q->queuedata; | ||
1348 | |||
1349 | if (dm_request_based(md)) | ||
1350 | return dm_make_request(q, bio); | ||
1351 | |||
1352 | return _dm_request(q, bio); | ||
1353 | } | ||
1354 | |||
1355 | void dm_dispatch_request(struct request *rq) | ||
1356 | { | ||
1357 | int r; | ||
1358 | |||
1359 | if (blk_queue_io_stat(rq->q)) | ||
1360 | rq->cmd_flags |= REQ_IO_STAT; | ||
1361 | |||
1362 | rq->start_time = jiffies; | ||
1363 | r = blk_insert_cloned_request(rq->q, rq); | ||
1364 | if (r) | ||
1365 | dm_complete_request(rq, r); | ||
1366 | } | ||
1367 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | ||
1368 | |||
1369 | static void dm_rq_bio_destructor(struct bio *bio) | ||
1370 | { | ||
1371 | struct dm_rq_clone_bio_info *info = bio->bi_private; | ||
1372 | struct mapped_device *md = info->tio->md; | ||
1373 | |||
1374 | free_bio_info(info); | ||
1375 | bio_free(bio, md->bs); | ||
1376 | } | ||
1377 | |||
1378 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | ||
1379 | void *data) | ||
1380 | { | ||
1381 | struct dm_rq_target_io *tio = data; | ||
1382 | struct mapped_device *md = tio->md; | ||
1383 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | ||
1384 | |||
1385 | if (!info) | ||
1386 | return -ENOMEM; | ||
1387 | |||
1388 | info->orig = bio_orig; | ||
1389 | info->tio = tio; | ||
1390 | bio->bi_end_io = end_clone_bio; | ||
1391 | bio->bi_private = info; | ||
1392 | bio->bi_destructor = dm_rq_bio_destructor; | ||
1393 | |||
1394 | return 0; | ||
1395 | } | ||
1396 | |||
1397 | static int setup_clone(struct request *clone, struct request *rq, | ||
1398 | struct dm_rq_target_io *tio) | ||
1399 | { | ||
1400 | int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1401 | dm_rq_bio_constructor, tio); | ||
1402 | |||
1403 | if (r) | ||
1404 | return r; | ||
1405 | |||
1406 | clone->cmd = rq->cmd; | ||
1407 | clone->cmd_len = rq->cmd_len; | ||
1408 | clone->sense = rq->sense; | ||
1409 | clone->buffer = rq->buffer; | ||
1410 | clone->end_io = end_clone_request; | ||
1411 | clone->end_io_data = tio; | ||
1412 | |||
1413 | return 0; | ||
1414 | } | ||
1415 | |||
1416 | static int dm_rq_flush_suspending(struct mapped_device *md) | ||
1417 | { | ||
1418 | return !md->suspend_rq.special; | ||
1419 | } | ||
1420 | |||
1421 | /* | ||
1422 | * Called with the queue lock held. | ||
1423 | */ | ||
1424 | static int dm_prep_fn(struct request_queue *q, struct request *rq) | ||
1425 | { | ||
1426 | struct mapped_device *md = q->queuedata; | ||
1427 | struct dm_rq_target_io *tio; | ||
1428 | struct request *clone; | ||
1429 | |||
1430 | if (unlikely(rq == &md->suspend_rq)) { | ||
1431 | if (dm_rq_flush_suspending(md)) | ||
1432 | return BLKPREP_OK; | ||
1433 | else | ||
1434 | /* The flush suspend was interrupted */ | ||
1435 | return BLKPREP_KILL; | ||
1436 | } | ||
1437 | |||
1438 | if (unlikely(rq->special)) { | ||
1439 | DMWARN("Already has something in rq->special."); | ||
1440 | return BLKPREP_KILL; | ||
1441 | } | ||
1442 | |||
1443 | tio = alloc_rq_tio(md); /* Only one for each original request */ | ||
1444 | if (!tio) | ||
1445 | /* -ENOMEM */ | ||
1446 | return BLKPREP_DEFER; | ||
1447 | |||
1448 | tio->md = md; | ||
1449 | tio->ti = NULL; | ||
1450 | tio->orig = rq; | ||
1451 | tio->error = 0; | ||
1452 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1453 | |||
1454 | clone = &tio->clone; | ||
1455 | if (setup_clone(clone, rq, tio)) { | ||
1456 | /* -ENOMEM */ | ||
1457 | free_rq_tio(tio); | ||
1458 | return BLKPREP_DEFER; | ||
1459 | } | ||
1460 | |||
1461 | rq->special = clone; | ||
1462 | rq->cmd_flags |= REQ_DONTPREP; | ||
1463 | |||
1464 | return BLKPREP_OK; | ||
1465 | } | ||
1466 | |||
1467 | static void map_request(struct dm_target *ti, struct request *rq, | ||
1468 | struct mapped_device *md) | ||
1469 | { | ||
1470 | int r; | ||
1471 | struct request *clone = rq->special; | ||
1472 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
1473 | |||
1474 | /* | ||
1475 | * Hold the md reference here for the in-flight I/O. | ||
1476 | * We can't rely on the reference count by device opener, | ||
1477 | * because the device may be closed during the request completion | ||
1478 | * when all bios are completed. | ||
1479 | * See the comment in rq_completed() too. | ||
1480 | */ | ||
1481 | dm_get(md); | ||
1482 | |||
1483 | tio->ti = ti; | ||
1484 | r = ti->type->map_rq(ti, clone, &tio->info); | ||
1485 | switch (r) { | ||
1486 | case DM_MAPIO_SUBMITTED: | ||
1487 | /* The target has taken the I/O to submit by itself later */ | ||
1488 | break; | ||
1489 | case DM_MAPIO_REMAPPED: | ||
1490 | /* The target has remapped the I/O so dispatch it */ | ||
1491 | dm_dispatch_request(clone); | ||
1492 | break; | ||
1493 | case DM_MAPIO_REQUEUE: | ||
1494 | /* The target wants to requeue the I/O */ | ||
1495 | dm_requeue_unmapped_request(clone); | ||
1496 | break; | ||
1497 | default: | ||
1498 | if (r > 0) { | ||
1499 | DMWARN("unimplemented target map return value: %d", r); | ||
1500 | BUG(); | ||
1501 | } | ||
1502 | |||
1503 | /* The target wants to complete the I/O */ | ||
1504 | dm_kill_unmapped_request(clone, r); | ||
1505 | break; | ||
1506 | } | ||
1507 | } | ||
1508 | |||
1509 | /* | ||
1510 | * q->request_fn for request-based dm. | ||
1511 | * Called with the queue lock held. | ||
1512 | */ | ||
1513 | static void dm_request_fn(struct request_queue *q) | ||
1514 | { | ||
1515 | struct mapped_device *md = q->queuedata; | ||
1516 | struct dm_table *map = dm_get_table(md); | ||
1517 | struct dm_target *ti; | ||
1518 | struct request *rq; | ||
1519 | |||
1520 | /* | ||
1521 | * For noflush suspend, check blk_queue_stopped() to immediately | ||
1522 | * quit I/O dispatching. | ||
1523 | */ | ||
1524 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | ||
1525 | rq = blk_peek_request(q); | ||
1526 | if (!rq) | ||
1527 | goto plug_and_out; | ||
1528 | |||
1529 | if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ | ||
1530 | if (queue_in_flight(q)) | ||
1531 | /* Not quiet yet. Wait more */ | ||
1532 | goto plug_and_out; | ||
1533 | |||
1534 | /* This device should be quiet now */ | ||
1535 | __stop_queue(q); | ||
1536 | blk_start_request(rq); | ||
1537 | __blk_end_request_all(rq, 0); | ||
1538 | wake_up(&md->wait); | ||
1539 | goto out; | ||
1540 | } | ||
1541 | |||
1542 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
1543 | if (ti->type->busy && ti->type->busy(ti)) | ||
1544 | goto plug_and_out; | ||
1545 | |||
1546 | blk_start_request(rq); | ||
1547 | spin_unlock(q->queue_lock); | ||
1548 | map_request(ti, rq, md); | ||
1549 | spin_lock_irq(q->queue_lock); | ||
1550 | } | ||
1551 | |||
1552 | goto out; | ||
1553 | |||
1554 | plug_and_out: | ||
1555 | if (!elv_queue_empty(q)) | ||
1556 | /* Some requests still remain, retry later */ | ||
1557 | blk_plug_device(q); | ||
1558 | |||
1559 | out: | ||
1560 | dm_table_put(map); | ||
1561 | |||
1562 | return; | ||
1563 | } | ||
1564 | |||
1565 | int dm_underlying_device_busy(struct request_queue *q) | ||
1566 | { | ||
1567 | return blk_lld_busy(q); | ||
1568 | } | ||
1569 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | ||
1570 | |||
1571 | static int dm_lld_busy(struct request_queue *q) | ||
1572 | { | ||
1573 | int r; | ||
1574 | struct mapped_device *md = q->queuedata; | ||
1575 | struct dm_table *map = dm_get_table(md); | ||
1576 | |||
1577 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | ||
1578 | r = 1; | ||
1579 | else | ||
1580 | r = dm_table_any_busy_target(map); | ||
1581 | |||
1582 | dm_table_put(map); | ||
1583 | |||
1584 | return r; | ||
1585 | } | ||
1586 | |||
1038 | static void dm_unplug_all(struct request_queue *q) | 1587 | static void dm_unplug_all(struct request_queue *q) |
1039 | { | 1588 | { |
1040 | struct mapped_device *md = q->queuedata; | 1589 | struct mapped_device *md = q->queuedata; |
1041 | struct dm_table *map = dm_get_table(md); | 1590 | struct dm_table *map = dm_get_table(md); |
1042 | 1591 | ||
1043 | if (map) { | 1592 | if (map) { |
1593 | if (dm_request_based(md)) | ||
1594 | generic_unplug_device(q); | ||
1595 | |||
1044 | dm_table_unplug_all(map); | 1596 | dm_table_unplug_all(map); |
1045 | dm_table_put(map); | 1597 | dm_table_put(map); |
1046 | } | 1598 | } |
@@ -1055,7 +1607,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
1055 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 1607 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
1056 | map = dm_get_table(md); | 1608 | map = dm_get_table(md); |
1057 | if (map) { | 1609 | if (map) { |
1058 | r = dm_table_any_congested(map, bdi_bits); | 1610 | /* |
1611 | * Request-based dm cares about only own queue for | ||
1612 | * the query about congestion status of request_queue | ||
1613 | */ | ||
1614 | if (dm_request_based(md)) | ||
1615 | r = md->queue->backing_dev_info.state & | ||
1616 | bdi_bits; | ||
1617 | else | ||
1618 | r = dm_table_any_congested(map, bdi_bits); | ||
1619 | |||
1059 | dm_table_put(map); | 1620 | dm_table_put(map); |
1060 | } | 1621 | } |
1061 | } | 1622 | } |
@@ -1458,6 +2019,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
1458 | { | 2019 | { |
1459 | int r = 0; | 2020 | int r = 0; |
1460 | DECLARE_WAITQUEUE(wait, current); | 2021 | DECLARE_WAITQUEUE(wait, current); |
2022 | struct request_queue *q = md->queue; | ||
2023 | unsigned long flags; | ||
1461 | 2024 | ||
1462 | dm_unplug_all(md->queue); | 2025 | dm_unplug_all(md->queue); |
1463 | 2026 | ||
@@ -1467,7 +2030,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
1467 | set_current_state(interruptible); | 2030 | set_current_state(interruptible); |
1468 | 2031 | ||
1469 | smp_mb(); | 2032 | smp_mb(); |
1470 | if (!atomic_read(&md->pending)) | 2033 | if (dm_request_based(md)) { |
2034 | spin_lock_irqsave(q->queue_lock, flags); | ||
2035 | if (!queue_in_flight(q) && blk_queue_stopped(q)) { | ||
2036 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2037 | break; | ||
2038 | } | ||
2039 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2040 | } else if (!atomic_read(&md->pending)) | ||
1471 | break; | 2041 | break; |
1472 | 2042 | ||
1473 | if (interruptible == TASK_INTERRUPTIBLE && | 2043 | if (interruptible == TASK_INTERRUPTIBLE && |
@@ -1584,6 +2154,67 @@ out: | |||
1584 | return r; | 2154 | return r; |
1585 | } | 2155 | } |
1586 | 2156 | ||
2157 | static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) | ||
2158 | { | ||
2159 | md->suspend_rq.special = (void *)0x1; | ||
2160 | } | ||
2161 | |||
2162 | static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) | ||
2163 | { | ||
2164 | struct request_queue *q = md->queue; | ||
2165 | unsigned long flags; | ||
2166 | |||
2167 | spin_lock_irqsave(q->queue_lock, flags); | ||
2168 | if (!noflush) | ||
2169 | dm_rq_invalidate_suspend_marker(md); | ||
2170 | __start_queue(q); | ||
2171 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2172 | } | ||
2173 | |||
2174 | static void dm_rq_start_suspend(struct mapped_device *md, int noflush) | ||
2175 | { | ||
2176 | struct request *rq = &md->suspend_rq; | ||
2177 | struct request_queue *q = md->queue; | ||
2178 | |||
2179 | if (noflush) | ||
2180 | stop_queue(q); | ||
2181 | else { | ||
2182 | blk_rq_init(q, rq); | ||
2183 | blk_insert_request(q, rq, 0, NULL); | ||
2184 | } | ||
2185 | } | ||
2186 | |||
2187 | static int dm_rq_suspend_available(struct mapped_device *md, int noflush) | ||
2188 | { | ||
2189 | int r = 1; | ||
2190 | struct request *rq = &md->suspend_rq; | ||
2191 | struct request_queue *q = md->queue; | ||
2192 | unsigned long flags; | ||
2193 | |||
2194 | if (noflush) | ||
2195 | return r; | ||
2196 | |||
2197 | /* The marker must be protected by queue lock if it is in use */ | ||
2198 | spin_lock_irqsave(q->queue_lock, flags); | ||
2199 | if (unlikely(rq->ref_count)) { | ||
2200 | /* | ||
2201 | * This can happen, when the previous flush suspend was | ||
2202 | * interrupted, the marker is still in the queue and | ||
2203 | * this flush suspend has been invoked, because we don't | ||
2204 | * remove the marker at the time of suspend interruption. | ||
2205 | * We have only one marker per mapped_device, so we can't | ||
2206 | * start another flush suspend while it is in use. | ||
2207 | */ | ||
2208 | BUG_ON(!rq->special); /* The marker should be invalidated */ | ||
2209 | DMWARN("Invalidating the previous flush suspend is still in" | ||
2210 | " progress. Please retry later."); | ||
2211 | r = 0; | ||
2212 | } | ||
2213 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2214 | |||
2215 | return r; | ||
2216 | } | ||
2217 | |||
1587 | /* | 2218 | /* |
1588 | * Functions to lock and unlock any filesystem running on the | 2219 | * Functions to lock and unlock any filesystem running on the |
1589 | * device. | 2220 | * device. |
@@ -1623,6 +2254,53 @@ static void unlock_fs(struct mapped_device *md) | |||
1623 | * dm_bind_table, dm_suspend must be called to flush any in | 2254 | * dm_bind_table, dm_suspend must be called to flush any in |
1624 | * flight bios and ensure that any further io gets deferred. | 2255 | * flight bios and ensure that any further io gets deferred. |
1625 | */ | 2256 | */ |
2257 | /* | ||
2258 | * Suspend mechanism in request-based dm. | ||
2259 | * | ||
2260 | * After the suspend starts, further incoming requests are kept in | ||
2261 | * the request_queue and deferred. | ||
2262 | * Remaining requests in the request_queue at the start of suspend are flushed | ||
2263 | * if it is flush suspend. | ||
2264 | * The suspend completes when the following conditions have been satisfied, | ||
2265 | * so wait for it: | ||
2266 | * 1. q->in_flight is 0 (which means no in_flight request) | ||
2267 | * 2. queue has been stopped (which means no request dispatching) | ||
2268 | * | ||
2269 | * | ||
2270 | * Noflush suspend | ||
2271 | * --------------- | ||
2272 | * Noflush suspend doesn't need to dispatch remaining requests. | ||
2273 | * So stop the queue immediately. Then, wait for all in_flight requests | ||
2274 | * to be completed or requeued. | ||
2275 | * | ||
2276 | * To abort noflush suspend, start the queue. | ||
2277 | * | ||
2278 | * | ||
2279 | * Flush suspend | ||
2280 | * ------------- | ||
2281 | * Flush suspend needs to dispatch remaining requests. So stop the queue | ||
2282 | * after the remaining requests are completed. (Requeued request must be also | ||
2283 | * re-dispatched and completed. Until then, we can't stop the queue.) | ||
2284 | * | ||
2285 | * During flushing the remaining requests, further incoming requests are also | ||
2286 | * inserted to the same queue. To distinguish which requests are to be | ||
2287 | * flushed, we insert a marker request to the queue at the time of starting | ||
2288 | * flush suspend, like a barrier. | ||
2289 | * The dispatching is blocked when the marker is found on the top of the queue. | ||
2290 | * And the queue is stopped when all in_flight requests are completed, since | ||
2291 | * that means the remaining requests are completely flushed. | ||
2292 | * Then, the marker is removed from the queue. | ||
2293 | * | ||
2294 | * To abort flush suspend, we also need to take care of the marker, not only | ||
2295 | * starting the queue. | ||
2296 | * We don't remove the marker forcibly from the queue since it's against | ||
2297 | * the block-layer manner. Instead, we put a invalidated mark on the marker. | ||
2298 | * When the invalidated marker is found on the top of the queue, it is | ||
2299 | * immediately removed from the queue, so it doesn't block dispatching. | ||
2300 | * Because we have only one marker per mapped_device, we can't start another | ||
2301 | * flush suspend until the invalidated marker is removed from the queue. | ||
2302 | * So fail and return with -EBUSY in such a case. | ||
2303 | */ | ||
1626 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2304 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
1627 | { | 2305 | { |
1628 | struct dm_table *map = NULL; | 2306 | struct dm_table *map = NULL; |
@@ -1637,6 +2315,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1637 | goto out_unlock; | 2315 | goto out_unlock; |
1638 | } | 2316 | } |
1639 | 2317 | ||
2318 | if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { | ||
2319 | r = -EBUSY; | ||
2320 | goto out_unlock; | ||
2321 | } | ||
2322 | |||
1640 | map = dm_get_table(md); | 2323 | map = dm_get_table(md); |
1641 | 2324 | ||
1642 | /* | 2325 | /* |
@@ -1682,6 +2365,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1682 | 2365 | ||
1683 | flush_workqueue(md->wq); | 2366 | flush_workqueue(md->wq); |
1684 | 2367 | ||
2368 | if (dm_request_based(md)) | ||
2369 | dm_rq_start_suspend(md, noflush); | ||
2370 | |||
1685 | /* | 2371 | /* |
1686 | * At this point no more requests are entering target request routines. | 2372 | * At this point no more requests are entering target request routines. |
1687 | * We call dm_wait_for_completion to wait for all existing requests | 2373 | * We call dm_wait_for_completion to wait for all existing requests |
@@ -1698,6 +2384,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1698 | if (r < 0) { | 2384 | if (r < 0) { |
1699 | dm_queue_flush(md); | 2385 | dm_queue_flush(md); |
1700 | 2386 | ||
2387 | if (dm_request_based(md)) | ||
2388 | dm_rq_abort_suspend(md, noflush); | ||
2389 | |||
1701 | unlock_fs(md); | 2390 | unlock_fs(md); |
1702 | goto out; /* pushback list is already flushed, so skip flush */ | 2391 | goto out; /* pushback list is already flushed, so skip flush */ |
1703 | } | 2392 | } |
@@ -1739,6 +2428,14 @@ int dm_resume(struct mapped_device *md) | |||
1739 | 2428 | ||
1740 | dm_queue_flush(md); | 2429 | dm_queue_flush(md); |
1741 | 2430 | ||
2431 | /* | ||
2432 | * Flushing deferred I/Os must be done after targets are resumed | ||
2433 | * so that mapping of targets can work correctly. | ||
2434 | * Request-based dm is queueing the deferred I/Os in its request_queue. | ||
2435 | */ | ||
2436 | if (dm_request_based(md)) | ||
2437 | start_queue(md->queue); | ||
2438 | |||
1742 | unlock_fs(md); | 2439 | unlock_fs(md); |
1743 | 2440 | ||
1744 | clear_bit(DMF_SUSPENDED, &md->flags); | 2441 | clear_bit(DMF_SUSPENDED, &md->flags); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 604e85caadf6..8dcabb1caff1 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -50,6 +50,7 @@ void dm_table_presuspend_targets(struct dm_table *t); | |||
50 | void dm_table_postsuspend_targets(struct dm_table *t); | 50 | void dm_table_postsuspend_targets(struct dm_table *t); |
51 | int dm_table_resume_targets(struct dm_table *t); | 51 | int dm_table_resume_targets(struct dm_table *t); |
52 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); | 52 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); |
53 | int dm_table_any_busy_target(struct dm_table *t); | ||
53 | 54 | ||
54 | /* | 55 | /* |
55 | * To check the return value from dm_table_find_target(). | 56 | * To check the return value from dm_table_find_target(). |