diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 20:07:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 20:07:18 -0400 |
commit | a2887097f25cd38cadfc11d10769e2b349fb5eca (patch) | |
tree | cd4adcb305365d6ba9acd2c02d4eb9d0125c6f8d /drivers/md/dm.c | |
parent | 8abfc6e7a45eb74e51904bbae676fae008b11366 (diff) | |
parent | 005a1d15f5a6b2bb4ada80349513effbf22b4588 (diff) |
Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits)
xen-blkfront: disable barrier/flush write support
Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c
block: remove BLKDEV_IFL_WAIT
aic7xxx_old: removed unused 'req' variable
block: remove the BH_Eopnotsupp flag
block: remove the BLKDEV_IFL_BARRIER flag
block: remove the WRITE_BARRIER flag
swap: do not send discards as barriers
fat: do not send discards as barriers
ext4: do not send discards as barriers
jbd2: replace barriers with explicit flush / FUA usage
jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier
jbd: replace barriers with explicit flush / FUA usage
nilfs2: replace barriers with explicit flush / FUA usage
reiserfs: replace barriers with explicit flush / FUA usage
gfs2: replace barriers with explicit flush / FUA usage
btrfs: replace barriers with explicit flush / FUA usage
xfs: replace barriers with explicit flush / FUA usage
block: pass gfp_mask and flags to sb_issue_discard
dm: convey that all flushes are processed as empty
...
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 398 |
1 files changed, 81 insertions, 317 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7967eca5a2d5..7cb1352f7e7a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
110 | #define DMF_FREEING 3 | 110 | #define DMF_FREEING 3 |
111 | #define DMF_DELETING 4 | 111 | #define DMF_DELETING 4 |
112 | #define DMF_NOFLUSH_SUSPENDING 5 | 112 | #define DMF_NOFLUSH_SUSPENDING 5 |
113 | #define DMF_QUEUE_IO_TO_THREAD 6 | ||
114 | 113 | ||
115 | /* | 114 | /* |
116 | * Work processed by per-device workqueue. | 115 | * Work processed by per-device workqueue. |
@@ -144,24 +143,9 @@ struct mapped_device { | |||
144 | spinlock_t deferred_lock; | 143 | spinlock_t deferred_lock; |
145 | 144 | ||
146 | /* | 145 | /* |
147 | * An error from the barrier request currently being processed. | 146 | * Processing queue (flush) |
148 | */ | ||
149 | int barrier_error; | ||
150 | |||
151 | /* | ||
152 | * Protect barrier_error from concurrent endio processing | ||
153 | * in request-based dm. | ||
154 | */ | ||
155 | spinlock_t barrier_error_lock; | ||
156 | |||
157 | /* | ||
158 | * Processing queue (flush/barriers) | ||
159 | */ | 147 | */ |
160 | struct workqueue_struct *wq; | 148 | struct workqueue_struct *wq; |
161 | struct work_struct barrier_work; | ||
162 | |||
163 | /* A pointer to the currently processing pre/post flush request */ | ||
164 | struct request *flush_request; | ||
165 | 149 | ||
166 | /* | 150 | /* |
167 | * The current mapping. | 151 | * The current mapping. |
@@ -200,8 +184,8 @@ struct mapped_device { | |||
200 | /* sysfs handle */ | 184 | /* sysfs handle */ |
201 | struct kobject kobj; | 185 | struct kobject kobj; |
202 | 186 | ||
203 | /* zero-length barrier that will be cloned and submitted to targets */ | 187 | /* zero-length flush that will be cloned and submitted to targets */ |
204 | struct bio barrier_bio; | 188 | struct bio flush_bio; |
205 | }; | 189 | }; |
206 | 190 | ||
207 | /* | 191 | /* |
@@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io) | |||
512 | 496 | ||
513 | /* | 497 | /* |
514 | * After this is decremented the bio must not be touched if it is | 498 | * After this is decremented the bio must not be touched if it is |
515 | * a barrier. | 499 | * a flush. |
516 | */ | 500 | */ |
517 | dm_disk(md)->part0.in_flight[rw] = pending = | 501 | dm_disk(md)->part0.in_flight[rw] = pending = |
518 | atomic_dec_return(&md->pending[rw]); | 502 | atomic_dec_return(&md->pending[rw]); |
@@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io) | |||
528 | */ | 512 | */ |
529 | static void queue_io(struct mapped_device *md, struct bio *bio) | 513 | static void queue_io(struct mapped_device *md, struct bio *bio) |
530 | { | 514 | { |
531 | down_write(&md->io_lock); | 515 | unsigned long flags; |
532 | 516 | ||
533 | spin_lock_irq(&md->deferred_lock); | 517 | spin_lock_irqsave(&md->deferred_lock, flags); |
534 | bio_list_add(&md->deferred, bio); | 518 | bio_list_add(&md->deferred, bio); |
535 | spin_unlock_irq(&md->deferred_lock); | 519 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
536 | 520 | queue_work(md->wq, &md->work); | |
537 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | ||
538 | queue_work(md->wq, &md->work); | ||
539 | |||
540 | up_write(&md->io_lock); | ||
541 | } | 521 | } |
542 | 522 | ||
543 | /* | 523 | /* |
@@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error) | |||
625 | * Target requested pushing back the I/O. | 605 | * Target requested pushing back the I/O. |
626 | */ | 606 | */ |
627 | spin_lock_irqsave(&md->deferred_lock, flags); | 607 | spin_lock_irqsave(&md->deferred_lock, flags); |
628 | if (__noflush_suspending(md)) { | 608 | if (__noflush_suspending(md)) |
629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 609 | bio_list_add_head(&md->deferred, io->bio); |
630 | bio_list_add_head(&md->deferred, | 610 | else |
631 | io->bio); | ||
632 | } else | ||
633 | /* noflush suspend was interrupted. */ | 611 | /* noflush suspend was interrupted. */ |
634 | io->error = -EIO; | 612 | io->error = -EIO; |
635 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 613 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
@@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error) | |||
637 | 615 | ||
638 | io_error = io->error; | 616 | io_error = io->error; |
639 | bio = io->bio; | 617 | bio = io->bio; |
618 | end_io_acct(io); | ||
619 | free_io(md, io); | ||
620 | |||
621 | if (io_error == DM_ENDIO_REQUEUE) | ||
622 | return; | ||
640 | 623 | ||
641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 624 | if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { |
642 | /* | 625 | /* |
643 | * There can be just one barrier request so we use | 626 | * Preflush done for flush with data, reissue |
644 | * a per-device variable for error reporting. | 627 | * without REQ_FLUSH. |
645 | * Note that you can't touch the bio after end_io_acct | ||
646 | * | ||
647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
648 | * underlying devices. We assume that if the device | ||
649 | * doesn't support empty barriers, it doesn't need | ||
650 | * cache flushing commands. | ||
651 | */ | 628 | */ |
652 | if (!md->barrier_error && | 629 | bio->bi_rw &= ~REQ_FLUSH; |
653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 630 | queue_io(md, bio); |
654 | md->barrier_error = io_error; | ||
655 | end_io_acct(io); | ||
656 | free_io(md, io); | ||
657 | } else { | 631 | } else { |
658 | end_io_acct(io); | 632 | /* done with normal IO or empty flush */ |
659 | free_io(md, io); | 633 | trace_block_bio_complete(md->queue, bio); |
660 | 634 | bio_endio(bio, io_error); | |
661 | if (io_error != DM_ENDIO_REQUEUE) { | ||
662 | trace_block_bio_complete(md->queue, bio); | ||
663 | |||
664 | bio_endio(bio, io_error); | ||
665 | } | ||
666 | } | 635 | } |
667 | } | 636 | } |
668 | } | 637 | } |
@@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error) | |||
755 | blk_update_request(tio->orig, 0, nr_bytes); | 724 | blk_update_request(tio->orig, 0, nr_bytes); |
756 | } | 725 | } |
757 | 726 | ||
758 | static void store_barrier_error(struct mapped_device *md, int error) | ||
759 | { | ||
760 | unsigned long flags; | ||
761 | |||
762 | spin_lock_irqsave(&md->barrier_error_lock, flags); | ||
763 | /* | ||
764 | * Basically, the first error is taken, but: | ||
765 | * -EOPNOTSUPP supersedes any I/O error. | ||
766 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | ||
767 | */ | ||
768 | if (!md->barrier_error || error == -EOPNOTSUPP || | ||
769 | (md->barrier_error != -EOPNOTSUPP && | ||
770 | error == DM_ENDIO_REQUEUE)) | ||
771 | md->barrier_error = error; | ||
772 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | ||
773 | } | ||
774 | |||
775 | /* | 727 | /* |
776 | * Don't touch any member of the md after calling this function because | 728 | * Don't touch any member of the md after calling this function because |
777 | * the md may be freed in dm_put() at the end of this function. | 729 | * the md may be freed in dm_put() at the end of this function. |
@@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone) | |||
809 | static void dm_end_request(struct request *clone, int error) | 761 | static void dm_end_request(struct request *clone, int error) |
810 | { | 762 | { |
811 | int rw = rq_data_dir(clone); | 763 | int rw = rq_data_dir(clone); |
812 | int run_queue = 1; | ||
813 | bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; | ||
814 | struct dm_rq_target_io *tio = clone->end_io_data; | 764 | struct dm_rq_target_io *tio = clone->end_io_data; |
815 | struct mapped_device *md = tio->md; | 765 | struct mapped_device *md = tio->md; |
816 | struct request *rq = tio->orig; | 766 | struct request *rq = tio->orig; |
817 | 767 | ||
818 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { | 768 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
819 | rq->errors = clone->errors; | 769 | rq->errors = clone->errors; |
820 | rq->resid_len = clone->resid_len; | 770 | rq->resid_len = clone->resid_len; |
821 | 771 | ||
@@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error) | |||
829 | } | 779 | } |
830 | 780 | ||
831 | free_rq_clone(clone); | 781 | free_rq_clone(clone); |
832 | 782 | blk_end_request_all(rq, error); | |
833 | if (unlikely(is_barrier)) { | 783 | rq_completed(md, rw, true); |
834 | if (unlikely(error)) | ||
835 | store_barrier_error(md, error); | ||
836 | run_queue = 0; | ||
837 | } else | ||
838 | blk_end_request_all(rq, error); | ||
839 | |||
840 | rq_completed(md, rw, run_queue); | ||
841 | } | 784 | } |
842 | 785 | ||
843 | static void dm_unprep_request(struct request *rq) | 786 | static void dm_unprep_request(struct request *rq) |
@@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone) | |||
862 | struct request_queue *q = rq->q; | 805 | struct request_queue *q = rq->q; |
863 | unsigned long flags; | 806 | unsigned long flags; |
864 | 807 | ||
865 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
866 | /* | ||
867 | * Barrier clones share an original request. | ||
868 | * Leave it to dm_end_request(), which handles this special | ||
869 | * case. | ||
870 | */ | ||
871 | dm_end_request(clone, DM_ENDIO_REQUEUE); | ||
872 | return; | ||
873 | } | ||
874 | |||
875 | dm_unprep_request(rq); | 808 | dm_unprep_request(rq); |
876 | 809 | ||
877 | spin_lock_irqsave(q->queue_lock, flags); | 810 | spin_lock_irqsave(q->queue_lock, flags); |
@@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error) | |||
961 | struct dm_rq_target_io *tio = clone->end_io_data; | 894 | struct dm_rq_target_io *tio = clone->end_io_data; |
962 | struct request *rq = tio->orig; | 895 | struct request *rq = tio->orig; |
963 | 896 | ||
964 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
965 | /* | ||
966 | * Barrier clones share an original request. So can't use | ||
967 | * softirq_done with the original. | ||
968 | * Pass the clone to dm_done() directly in this special case. | ||
969 | * It is safe (even if clone->q->queue_lock is held here) | ||
970 | * because there is no I/O dispatching during the completion | ||
971 | * of barrier clone. | ||
972 | */ | ||
973 | dm_done(clone, error, true); | ||
974 | return; | ||
975 | } | ||
976 | |||
977 | tio->error = error; | 897 | tio->error = error; |
978 | rq->completion_data = clone; | 898 | rq->completion_data = clone; |
979 | blk_complete_request(rq); | 899 | blk_complete_request(rq); |
@@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error) | |||
990 | struct dm_rq_target_io *tio = clone->end_io_data; | 910 | struct dm_rq_target_io *tio = clone->end_io_data; |
991 | struct request *rq = tio->orig; | 911 | struct request *rq = tio->orig; |
992 | 912 | ||
993 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
994 | /* | ||
995 | * Barrier clones share an original request. | ||
996 | * Leave it to dm_end_request(), which handles this special | ||
997 | * case. | ||
998 | */ | ||
999 | BUG_ON(error > 0); | ||
1000 | dm_end_request(clone, error); | ||
1001 | return; | ||
1002 | } | ||
1003 | |||
1004 | rq->cmd_flags |= REQ_FAILED; | 913 | rq->cmd_flags |= REQ_FAILED; |
1005 | dm_complete_request(clone, error); | 914 | dm_complete_request(clone, error); |
1006 | } | 915 | } |
@@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
1119 | } | 1028 | } |
1120 | 1029 | ||
1121 | /* | 1030 | /* |
1122 | * Creates a little bio that is just does part of a bvec. | 1031 | * Creates a little bio that just does part of a bvec. |
1123 | */ | 1032 | */ |
1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1033 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1125 | unsigned short idx, unsigned int offset, | 1034 | unsigned short idx, unsigned int offset, |
@@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1134 | 1043 | ||
1135 | clone->bi_sector = sector; | 1044 | clone->bi_sector = sector; |
1136 | clone->bi_bdev = bio->bi_bdev; | 1045 | clone->bi_bdev = bio->bi_bdev; |
1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1046 | clone->bi_rw = bio->bi_rw; |
1138 | clone->bi_vcnt = 1; | 1047 | clone->bi_vcnt = 1; |
1139 | clone->bi_size = to_bytes(len); | 1048 | clone->bi_size = to_bytes(len); |
1140 | clone->bi_io_vec->bv_offset = offset; | 1049 | clone->bi_io_vec->bv_offset = offset; |
@@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1161 | 1070 | ||
1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1071 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1163 | __bio_clone(clone, bio); | 1072 | __bio_clone(clone, bio); |
1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
1165 | clone->bi_destructor = dm_bio_destructor; | 1073 | clone->bi_destructor = dm_bio_destructor; |
1166 | clone->bi_sector = sector; | 1074 | clone->bi_sector = sector; |
1167 | clone->bi_idx = idx; | 1075 | clone->bi_idx = idx; |
@@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
1225 | __issue_target_request(ci, ti, request_nr, len); | 1133 | __issue_target_request(ci, ti, request_nr, len); |
1226 | } | 1134 | } |
1227 | 1135 | ||
1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1136 | static int __clone_and_map_empty_flush(struct clone_info *ci) |
1229 | { | 1137 | { |
1230 | unsigned target_nr = 0; | 1138 | unsigned target_nr = 0; |
1231 | struct dm_target *ti; | 1139 | struct dm_target *ti; |
1232 | 1140 | ||
1141 | BUG_ON(bio_has_data(ci->bio)); | ||
1233 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | 1142 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
1234 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); | 1143 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); |
1235 | 1144 | ||
1236 | ci->sector_count = 0; | ||
1237 | |||
1238 | return 0; | 1145 | return 0; |
1239 | } | 1146 | } |
1240 | 1147 | ||
@@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
1289 | sector_t len = 0, max; | 1196 | sector_t len = 0, max; |
1290 | struct dm_target_io *tio; | 1197 | struct dm_target_io *tio; |
1291 | 1198 | ||
1292 | if (unlikely(bio_empty_barrier(bio))) | ||
1293 | return __clone_and_map_empty_barrier(ci); | ||
1294 | |||
1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1199 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1296 | return __clone_and_map_discard(ci); | 1200 | return __clone_and_map_discard(ci); |
1297 | 1201 | ||
@@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1383 | 1287 | ||
1384 | ci.map = dm_get_live_table(md); | 1288 | ci.map = dm_get_live_table(md); |
1385 | if (unlikely(!ci.map)) { | 1289 | if (unlikely(!ci.map)) { |
1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1290 | bio_io_error(bio); |
1387 | bio_io_error(bio); | ||
1388 | else | ||
1389 | if (!md->barrier_error) | ||
1390 | md->barrier_error = -EIO; | ||
1391 | return; | 1291 | return; |
1392 | } | 1292 | } |
1393 | 1293 | ||
1394 | ci.md = md; | 1294 | ci.md = md; |
1395 | ci.bio = bio; | ||
1396 | ci.io = alloc_io(md); | 1295 | ci.io = alloc_io(md); |
1397 | ci.io->error = 0; | 1296 | ci.io->error = 0; |
1398 | atomic_set(&ci.io->io_count, 1); | 1297 | atomic_set(&ci.io->io_count, 1); |
@@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1400 | ci.io->md = md; | 1299 | ci.io->md = md; |
1401 | spin_lock_init(&ci.io->endio_lock); | 1300 | spin_lock_init(&ci.io->endio_lock); |
1402 | ci.sector = bio->bi_sector; | 1301 | ci.sector = bio->bi_sector; |
1403 | ci.sector_count = bio_sectors(bio); | ||
1404 | if (unlikely(bio_empty_barrier(bio))) | ||
1405 | ci.sector_count = 1; | ||
1406 | ci.idx = bio->bi_idx; | 1302 | ci.idx = bio->bi_idx; |
1407 | 1303 | ||
1408 | start_io_acct(ci.io); | 1304 | start_io_acct(ci.io); |
1409 | while (ci.sector_count && !error) | 1305 | if (bio->bi_rw & REQ_FLUSH) { |
1410 | error = __clone_and_map(&ci); | 1306 | ci.bio = &ci.md->flush_bio; |
1307 | ci.sector_count = 0; | ||
1308 | error = __clone_and_map_empty_flush(&ci); | ||
1309 | /* dec_pending submits any data associated with flush */ | ||
1310 | } else { | ||
1311 | ci.bio = bio; | ||
1312 | ci.sector_count = bio_sectors(bio); | ||
1313 | while (ci.sector_count && !error) | ||
1314 | error = __clone_and_map(&ci); | ||
1315 | } | ||
1411 | 1316 | ||
1412 | /* drop the extra reference count */ | 1317 | /* drop the extra reference count */ |
1413 | dec_pending(ci.io, error); | 1318 | dec_pending(ci.io, error); |
@@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
1491 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1396 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
1492 | part_stat_unlock(); | 1397 | part_stat_unlock(); |
1493 | 1398 | ||
1494 | /* | 1399 | /* if we're suspended, we have to queue this io for later */ |
1495 | * If we're suspended or the thread is processing barriers | 1400 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
1496 | * we have to queue this io for later. | ||
1497 | */ | ||
1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | ||
1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | ||
1500 | up_read(&md->io_lock); | 1401 | up_read(&md->io_lock); |
1501 | 1402 | ||
1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1403 | if (bio_rw(bio) != READA) |
1503 | bio_rw(bio) == READA) { | 1404 | queue_io(md, bio); |
1405 | else | ||
1504 | bio_io_error(bio); | 1406 | bio_io_error(bio); |
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | queue_io(md, bio); | ||
1509 | |||
1510 | return 0; | 1407 | return 0; |
1511 | } | 1408 | } |
1512 | 1409 | ||
@@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
1537 | return _dm_request(q, bio); | 1434 | return _dm_request(q, bio); |
1538 | } | 1435 | } |
1539 | 1436 | ||
1540 | static bool dm_rq_is_flush_request(struct request *rq) | ||
1541 | { | ||
1542 | if (rq->cmd_flags & REQ_FLUSH) | ||
1543 | return true; | ||
1544 | else | ||
1545 | return false; | ||
1546 | } | ||
1547 | |||
1548 | void dm_dispatch_request(struct request *rq) | 1437 | void dm_dispatch_request(struct request *rq) |
1549 | { | 1438 | { |
1550 | int r; | 1439 | int r; |
@@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1592 | { | 1481 | { |
1593 | int r; | 1482 | int r; |
1594 | 1483 | ||
1595 | if (dm_rq_is_flush_request(rq)) { | 1484 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, |
1596 | blk_rq_init(NULL, clone); | 1485 | dm_rq_bio_constructor, tio); |
1597 | clone->cmd_type = REQ_TYPE_FS; | 1486 | if (r) |
1598 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | 1487 | return r; |
1599 | } else { | ||
1600 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1601 | dm_rq_bio_constructor, tio); | ||
1602 | if (r) | ||
1603 | return r; | ||
1604 | |||
1605 | clone->cmd = rq->cmd; | ||
1606 | clone->cmd_len = rq->cmd_len; | ||
1607 | clone->sense = rq->sense; | ||
1608 | clone->buffer = rq->buffer; | ||
1609 | } | ||
1610 | 1488 | ||
1489 | clone->cmd = rq->cmd; | ||
1490 | clone->cmd_len = rq->cmd_len; | ||
1491 | clone->sense = rq->sense; | ||
1492 | clone->buffer = rq->buffer; | ||
1611 | clone->end_io = end_clone_request; | 1493 | clone->end_io = end_clone_request; |
1612 | clone->end_io_data = tio; | 1494 | clone->end_io_data = tio; |
1613 | 1495 | ||
@@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1648 | struct mapped_device *md = q->queuedata; | 1530 | struct mapped_device *md = q->queuedata; |
1649 | struct request *clone; | 1531 | struct request *clone; |
1650 | 1532 | ||
1651 | if (unlikely(dm_rq_is_flush_request(rq))) | ||
1652 | return BLKPREP_OK; | ||
1653 | |||
1654 | if (unlikely(rq->special)) { | 1533 | if (unlikely(rq->special)) { |
1655 | DMWARN("Already has something in rq->special."); | 1534 | DMWARN("Already has something in rq->special."); |
1656 | return BLKPREP_KILL; | 1535 | return BLKPREP_KILL; |
@@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q) | |||
1727 | struct dm_table *map = dm_get_live_table(md); | 1606 | struct dm_table *map = dm_get_live_table(md); |
1728 | struct dm_target *ti; | 1607 | struct dm_target *ti; |
1729 | struct request *rq, *clone; | 1608 | struct request *rq, *clone; |
1609 | sector_t pos; | ||
1730 | 1610 | ||
1731 | /* | 1611 | /* |
1732 | * For suspend, check blk_queue_stopped() and increment | 1612 | * For suspend, check blk_queue_stopped() and increment |
@@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q) | |||
1739 | if (!rq) | 1619 | if (!rq) |
1740 | goto plug_and_out; | 1620 | goto plug_and_out; |
1741 | 1621 | ||
1742 | if (unlikely(dm_rq_is_flush_request(rq))) { | 1622 | /* always use block 0 to find the target for flushes for now */ |
1743 | BUG_ON(md->flush_request); | 1623 | pos = 0; |
1744 | md->flush_request = rq; | 1624 | if (!(rq->cmd_flags & REQ_FLUSH)) |
1745 | blk_start_request(rq); | 1625 | pos = blk_rq_pos(rq); |
1746 | queue_work(md->wq, &md->barrier_work); | 1626 | |
1747 | goto out; | 1627 | ti = dm_table_find_target(map, pos); |
1748 | } | 1628 | BUG_ON(!dm_target_is_valid(ti)); |
1749 | 1629 | ||
1750 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
1751 | if (ti->type->busy && ti->type->busy(ti)) | 1630 | if (ti->type->busy && ti->type->busy(ti)) |
1752 | goto plug_and_out; | 1631 | goto plug_and_out; |
1753 | 1632 | ||
@@ -1918,7 +1797,6 @@ out: | |||
1918 | static const struct block_device_operations dm_blk_dops; | 1797 | static const struct block_device_operations dm_blk_dops; |
1919 | 1798 | ||
1920 | static void dm_wq_work(struct work_struct *work); | 1799 | static void dm_wq_work(struct work_struct *work); |
1921 | static void dm_rq_barrier_work(struct work_struct *work); | ||
1922 | 1800 | ||
1923 | static void dm_init_md_queue(struct mapped_device *md) | 1801 | static void dm_init_md_queue(struct mapped_device *md) |
1924 | { | 1802 | { |
@@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1818 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1941 | md->queue->unplug_fn = dm_unplug_all; | 1819 | md->queue->unplug_fn = dm_unplug_all; |
1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1820 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1821 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1943 | } | 1822 | } |
1944 | 1823 | ||
1945 | /* | 1824 | /* |
@@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
1972 | mutex_init(&md->suspend_lock); | 1851 | mutex_init(&md->suspend_lock); |
1973 | mutex_init(&md->type_lock); | 1852 | mutex_init(&md->type_lock); |
1974 | spin_lock_init(&md->deferred_lock); | 1853 | spin_lock_init(&md->deferred_lock); |
1975 | spin_lock_init(&md->barrier_error_lock); | ||
1976 | rwlock_init(&md->map_lock); | 1854 | rwlock_init(&md->map_lock); |
1977 | atomic_set(&md->holders, 1); | 1855 | atomic_set(&md->holders, 1); |
1978 | atomic_set(&md->open_count, 0); | 1856 | atomic_set(&md->open_count, 0); |
@@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
1995 | atomic_set(&md->pending[1], 0); | 1873 | atomic_set(&md->pending[1], 0); |
1996 | init_waitqueue_head(&md->wait); | 1874 | init_waitqueue_head(&md->wait); |
1997 | INIT_WORK(&md->work, dm_wq_work); | 1875 | INIT_WORK(&md->work, dm_wq_work); |
1998 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | ||
1999 | init_waitqueue_head(&md->eventq); | 1876 | init_waitqueue_head(&md->eventq); |
2000 | 1877 | ||
2001 | md->disk->major = _major; | 1878 | md->disk->major = _major; |
@@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
2015 | if (!md->bdev) | 1892 | if (!md->bdev) |
2016 | goto bad_bdev; | 1893 | goto bad_bdev; |
2017 | 1894 | ||
1895 | bio_init(&md->flush_bio); | ||
1896 | md->flush_bio.bi_bdev = md->bdev; | ||
1897 | md->flush_bio.bi_rw = WRITE_FLUSH; | ||
1898 | |||
2018 | /* Populate the mapping, nobody knows we exist yet */ | 1899 | /* Populate the mapping, nobody knows we exist yet */ |
2019 | spin_lock(&_minor_lock); | 1900 | spin_lock(&_minor_lock); |
2020 | old_md = idr_replace(&_minor_idr, md, minor); | 1901 | old_md = idr_replace(&_minor_idr, md, minor); |
@@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2126 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2127 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2128 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); | ||
2249 | 2129 | ||
2250 | elv_register_queue(md->queue); | 2130 | elv_register_queue(md->queue); |
2251 | 2131 | ||
@@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2406 | return r; | 2286 | return r; |
2407 | } | 2287 | } |
2408 | 2288 | ||
2409 | static void dm_flush(struct mapped_device *md) | ||
2410 | { | ||
2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2412 | |||
2413 | bio_init(&md->barrier_bio); | ||
2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
2417 | |||
2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2419 | } | ||
2420 | |||
2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | ||
2422 | { | ||
2423 | md->barrier_error = 0; | ||
2424 | |||
2425 | dm_flush(md); | ||
2426 | |||
2427 | if (!bio_empty_barrier(bio)) { | ||
2428 | __split_and_process_bio(md, bio); | ||
2429 | /* | ||
2430 | * If the request isn't supported, don't waste time with | ||
2431 | * the second flush. | ||
2432 | */ | ||
2433 | if (md->barrier_error != -EOPNOTSUPP) | ||
2434 | dm_flush(md); | ||
2435 | } | ||
2436 | |||
2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | ||
2438 | bio_endio(bio, md->barrier_error); | ||
2439 | else { | ||
2440 | spin_lock_irq(&md->deferred_lock); | ||
2441 | bio_list_add_head(&md->deferred, bio); | ||
2442 | spin_unlock_irq(&md->deferred_lock); | ||
2443 | } | ||
2444 | } | ||
2445 | |||
2446 | /* | 2289 | /* |
2447 | * Process the deferred bios | 2290 | * Process the deferred bios |
2448 | */ | 2291 | */ |
@@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work) | |||
2452 | work); | 2295 | work); |
2453 | struct bio *c; | 2296 | struct bio *c; |
2454 | 2297 | ||
2455 | down_write(&md->io_lock); | 2298 | down_read(&md->io_lock); |
2456 | 2299 | ||
2457 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2300 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
2458 | spin_lock_irq(&md->deferred_lock); | 2301 | spin_lock_irq(&md->deferred_lock); |
2459 | c = bio_list_pop(&md->deferred); | 2302 | c = bio_list_pop(&md->deferred); |
2460 | spin_unlock_irq(&md->deferred_lock); | 2303 | spin_unlock_irq(&md->deferred_lock); |
2461 | 2304 | ||
2462 | if (!c) { | 2305 | if (!c) |
2463 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2464 | break; | 2306 | break; |
2465 | } | ||
2466 | 2307 | ||
2467 | up_write(&md->io_lock); | 2308 | up_read(&md->io_lock); |
2468 | 2309 | ||
2469 | if (dm_request_based(md)) | 2310 | if (dm_request_based(md)) |
2470 | generic_make_request(c); | 2311 | generic_make_request(c); |
2471 | else { | 2312 | else |
2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2313 | __split_and_process_bio(md, c); |
2473 | process_barrier(md, c); | ||
2474 | else | ||
2475 | __split_and_process_bio(md, c); | ||
2476 | } | ||
2477 | 2314 | ||
2478 | down_write(&md->io_lock); | 2315 | down_read(&md->io_lock); |
2479 | } | 2316 | } |
2480 | 2317 | ||
2481 | up_write(&md->io_lock); | 2318 | up_read(&md->io_lock); |
2482 | } | 2319 | } |
2483 | 2320 | ||
2484 | static void dm_queue_flush(struct mapped_device *md) | 2321 | static void dm_queue_flush(struct mapped_device *md) |
@@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md) | |||
2488 | queue_work(md->wq, &md->work); | 2325 | queue_work(md->wq, &md->work); |
2489 | } | 2326 | } |
2490 | 2327 | ||
2491 | static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) | ||
2492 | { | ||
2493 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
2494 | |||
2495 | tio->info.target_request_nr = request_nr; | ||
2496 | } | ||
2497 | |||
2498 | /* Issue barrier requests to targets and wait for their completion. */ | ||
2499 | static int dm_rq_barrier(struct mapped_device *md) | ||
2500 | { | ||
2501 | int i, j; | ||
2502 | struct dm_table *map = dm_get_live_table(md); | ||
2503 | unsigned num_targets = dm_table_get_num_targets(map); | ||
2504 | struct dm_target *ti; | ||
2505 | struct request *clone; | ||
2506 | |||
2507 | md->barrier_error = 0; | ||
2508 | |||
2509 | for (i = 0; i < num_targets; i++) { | ||
2510 | ti = dm_table_get_target(map, i); | ||
2511 | for (j = 0; j < ti->num_flush_requests; j++) { | ||
2512 | clone = clone_rq(md->flush_request, md, GFP_NOIO); | ||
2513 | dm_rq_set_target_request_nr(clone, j); | ||
2514 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
2515 | map_request(ti, clone, md); | ||
2516 | } | ||
2517 | } | ||
2518 | |||
2519 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2520 | dm_table_put(map); | ||
2521 | |||
2522 | return md->barrier_error; | ||
2523 | } | ||
2524 | |||
2525 | static void dm_rq_barrier_work(struct work_struct *work) | ||
2526 | { | ||
2527 | int error; | ||
2528 | struct mapped_device *md = container_of(work, struct mapped_device, | ||
2529 | barrier_work); | ||
2530 | struct request_queue *q = md->queue; | ||
2531 | struct request *rq; | ||
2532 | unsigned long flags; | ||
2533 | |||
2534 | /* | ||
2535 | * Hold the md reference here and leave it at the last part so that | ||
2536 | * the md can't be deleted by device opener when the barrier request | ||
2537 | * completes. | ||
2538 | */ | ||
2539 | dm_get(md); | ||
2540 | |||
2541 | error = dm_rq_barrier(md); | ||
2542 | |||
2543 | rq = md->flush_request; | ||
2544 | md->flush_request = NULL; | ||
2545 | |||
2546 | if (error == DM_ENDIO_REQUEUE) { | ||
2547 | spin_lock_irqsave(q->queue_lock, flags); | ||
2548 | blk_requeue_request(q, rq); | ||
2549 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2550 | } else | ||
2551 | blk_end_request_all(rq, error); | ||
2552 | |||
2553 | blk_run_queue(q); | ||
2554 | |||
2555 | dm_put(md); | ||
2556 | } | ||
2557 | |||
2558 | /* | 2328 | /* |
2559 | * Swap in a new table, returning the old one for the caller to destroy. | 2329 | * Swap in a new table, returning the old one for the caller to destroy. |
2560 | */ | 2330 | */ |
@@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2677 | * | 2447 | * |
2678 | * To get all processes out of __split_and_process_bio in dm_request, | 2448 | * To get all processes out of __split_and_process_bio in dm_request, |
2679 | * we take the write lock. To prevent any process from reentering | 2449 | * we take the write lock. To prevent any process from reentering |
2680 | * __split_and_process_bio from dm_request, we set | 2450 | * __split_and_process_bio from dm_request and quiesce the thread |
2681 | * DMF_QUEUE_IO_TO_THREAD. | 2451 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
2682 | * | 2452 | * flush_workqueue(md->wq). |
2683 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | ||
2684 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | ||
2685 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | ||
2686 | * further calls to __split_and_process_bio from dm_wq_work. | ||
2687 | */ | 2453 | */ |
2688 | down_write(&md->io_lock); | 2454 | down_write(&md->io_lock); |
2689 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2455 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2690 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2691 | up_write(&md->io_lock); | 2456 | up_write(&md->io_lock); |
2692 | 2457 | ||
2693 | /* | 2458 | /* |
2694 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | 2459 | * Stop md->queue before flushing md->wq in case request-based |
2695 | * can be kicked until md->queue is stopped. So stop md->queue before | 2460 | * dm defers requests to md->wq from md->queue. |
2696 | * flushing md->wq. | ||
2697 | */ | 2461 | */ |
2698 | if (dm_request_based(md)) | 2462 | if (dm_request_based(md)) |
2699 | stop_queue(md->queue); | 2463 | stop_queue(md->queue); |