diff options
Diffstat (limited to 'drivers/md/dm.c')
| -rw-r--r-- | drivers/md/dm.c | 398 |
1 files changed, 81 insertions, 317 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7967eca5a2d5..7cb1352f7e7a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
| 110 | #define DMF_FREEING 3 | 110 | #define DMF_FREEING 3 |
| 111 | #define DMF_DELETING 4 | 111 | #define DMF_DELETING 4 |
| 112 | #define DMF_NOFLUSH_SUSPENDING 5 | 112 | #define DMF_NOFLUSH_SUSPENDING 5 |
| 113 | #define DMF_QUEUE_IO_TO_THREAD 6 | ||
| 114 | 113 | ||
| 115 | /* | 114 | /* |
| 116 | * Work processed by per-device workqueue. | 115 | * Work processed by per-device workqueue. |
| @@ -144,24 +143,9 @@ struct mapped_device { | |||
| 144 | spinlock_t deferred_lock; | 143 | spinlock_t deferred_lock; |
| 145 | 144 | ||
| 146 | /* | 145 | /* |
| 147 | * An error from the barrier request currently being processed. | 146 | * Processing queue (flush) |
| 148 | */ | ||
| 149 | int barrier_error; | ||
| 150 | |||
| 151 | /* | ||
| 152 | * Protect barrier_error from concurrent endio processing | ||
| 153 | * in request-based dm. | ||
| 154 | */ | ||
| 155 | spinlock_t barrier_error_lock; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Processing queue (flush/barriers) | ||
| 159 | */ | 147 | */ |
| 160 | struct workqueue_struct *wq; | 148 | struct workqueue_struct *wq; |
| 161 | struct work_struct barrier_work; | ||
| 162 | |||
| 163 | /* A pointer to the currently processing pre/post flush request */ | ||
| 164 | struct request *flush_request; | ||
| 165 | 149 | ||
| 166 | /* | 150 | /* |
| 167 | * The current mapping. | 151 | * The current mapping. |
| @@ -200,8 +184,8 @@ struct mapped_device { | |||
| 200 | /* sysfs handle */ | 184 | /* sysfs handle */ |
| 201 | struct kobject kobj; | 185 | struct kobject kobj; |
| 202 | 186 | ||
| 203 | /* zero-length barrier that will be cloned and submitted to targets */ | 187 | /* zero-length flush that will be cloned and submitted to targets */ |
| 204 | struct bio barrier_bio; | 188 | struct bio flush_bio; |
| 205 | }; | 189 | }; |
| 206 | 190 | ||
| 207 | /* | 191 | /* |
| @@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io) | |||
| 512 | 496 | ||
| 513 | /* | 497 | /* |
| 514 | * After this is decremented the bio must not be touched if it is | 498 | * After this is decremented the bio must not be touched if it is |
| 515 | * a barrier. | 499 | * a flush. |
| 516 | */ | 500 | */ |
| 517 | dm_disk(md)->part0.in_flight[rw] = pending = | 501 | dm_disk(md)->part0.in_flight[rw] = pending = |
| 518 | atomic_dec_return(&md->pending[rw]); | 502 | atomic_dec_return(&md->pending[rw]); |
| @@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io) | |||
| 528 | */ | 512 | */ |
| 529 | static void queue_io(struct mapped_device *md, struct bio *bio) | 513 | static void queue_io(struct mapped_device *md, struct bio *bio) |
| 530 | { | 514 | { |
| 531 | down_write(&md->io_lock); | 515 | unsigned long flags; |
| 532 | 516 | ||
| 533 | spin_lock_irq(&md->deferred_lock); | 517 | spin_lock_irqsave(&md->deferred_lock, flags); |
| 534 | bio_list_add(&md->deferred, bio); | 518 | bio_list_add(&md->deferred, bio); |
| 535 | spin_unlock_irq(&md->deferred_lock); | 519 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
| 536 | 520 | queue_work(md->wq, &md->work); | |
| 537 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | ||
| 538 | queue_work(md->wq, &md->work); | ||
| 539 | |||
| 540 | up_write(&md->io_lock); | ||
| 541 | } | 521 | } |
| 542 | 522 | ||
| 543 | /* | 523 | /* |
| @@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 625 | * Target requested pushing back the I/O. | 605 | * Target requested pushing back the I/O. |
| 626 | */ | 606 | */ |
| 627 | spin_lock_irqsave(&md->deferred_lock, flags); | 607 | spin_lock_irqsave(&md->deferred_lock, flags); |
| 628 | if (__noflush_suspending(md)) { | 608 | if (__noflush_suspending(md)) |
| 629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 609 | bio_list_add_head(&md->deferred, io->bio); |
| 630 | bio_list_add_head(&md->deferred, | 610 | else |
| 631 | io->bio); | ||
| 632 | } else | ||
| 633 | /* noflush suspend was interrupted. */ | 611 | /* noflush suspend was interrupted. */ |
| 634 | io->error = -EIO; | 612 | io->error = -EIO; |
| 635 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 613 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
| @@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 637 | 615 | ||
| 638 | io_error = io->error; | 616 | io_error = io->error; |
| 639 | bio = io->bio; | 617 | bio = io->bio; |
| 618 | end_io_acct(io); | ||
| 619 | free_io(md, io); | ||
| 620 | |||
| 621 | if (io_error == DM_ENDIO_REQUEUE) | ||
| 622 | return; | ||
| 640 | 623 | ||
| 641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 624 | if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { |
| 642 | /* | 625 | /* |
| 643 | * There can be just one barrier request so we use | 626 | * Preflush done for flush with data, reissue |
| 644 | * a per-device variable for error reporting. | 627 | * without REQ_FLUSH. |
| 645 | * Note that you can't touch the bio after end_io_acct | ||
| 646 | * | ||
| 647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
| 648 | * underlying devices. We assume that if the device | ||
| 649 | * doesn't support empty barriers, it doesn't need | ||
| 650 | * cache flushing commands. | ||
| 651 | */ | 628 | */ |
| 652 | if (!md->barrier_error && | 629 | bio->bi_rw &= ~REQ_FLUSH; |
| 653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 630 | queue_io(md, bio); |
| 654 | md->barrier_error = io_error; | ||
| 655 | end_io_acct(io); | ||
| 656 | free_io(md, io); | ||
| 657 | } else { | 631 | } else { |
| 658 | end_io_acct(io); | 632 | /* done with normal IO or empty flush */ |
| 659 | free_io(md, io); | 633 | trace_block_bio_complete(md->queue, bio); |
| 660 | 634 | bio_endio(bio, io_error); | |
| 661 | if (io_error != DM_ENDIO_REQUEUE) { | ||
| 662 | trace_block_bio_complete(md->queue, bio); | ||
| 663 | |||
| 664 | bio_endio(bio, io_error); | ||
| 665 | } | ||
| 666 | } | 635 | } |
| 667 | } | 636 | } |
| 668 | } | 637 | } |
| @@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error) | |||
| 755 | blk_update_request(tio->orig, 0, nr_bytes); | 724 | blk_update_request(tio->orig, 0, nr_bytes); |
| 756 | } | 725 | } |
| 757 | 726 | ||
| 758 | static void store_barrier_error(struct mapped_device *md, int error) | ||
| 759 | { | ||
| 760 | unsigned long flags; | ||
| 761 | |||
| 762 | spin_lock_irqsave(&md->barrier_error_lock, flags); | ||
| 763 | /* | ||
| 764 | * Basically, the first error is taken, but: | ||
| 765 | * -EOPNOTSUPP supersedes any I/O error. | ||
| 766 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | ||
| 767 | */ | ||
| 768 | if (!md->barrier_error || error == -EOPNOTSUPP || | ||
| 769 | (md->barrier_error != -EOPNOTSUPP && | ||
| 770 | error == DM_ENDIO_REQUEUE)) | ||
| 771 | md->barrier_error = error; | ||
| 772 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | ||
| 773 | } | ||
| 774 | |||
| 775 | /* | 727 | /* |
| 776 | * Don't touch any member of the md after calling this function because | 728 | * Don't touch any member of the md after calling this function because |
| 777 | * the md may be freed in dm_put() at the end of this function. | 729 | * the md may be freed in dm_put() at the end of this function. |
| @@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone) | |||
| 809 | static void dm_end_request(struct request *clone, int error) | 761 | static void dm_end_request(struct request *clone, int error) |
| 810 | { | 762 | { |
| 811 | int rw = rq_data_dir(clone); | 763 | int rw = rq_data_dir(clone); |
| 812 | int run_queue = 1; | ||
| 813 | bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; | ||
| 814 | struct dm_rq_target_io *tio = clone->end_io_data; | 764 | struct dm_rq_target_io *tio = clone->end_io_data; |
| 815 | struct mapped_device *md = tio->md; | 765 | struct mapped_device *md = tio->md; |
| 816 | struct request *rq = tio->orig; | 766 | struct request *rq = tio->orig; |
| 817 | 767 | ||
| 818 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { | 768 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
| 819 | rq->errors = clone->errors; | 769 | rq->errors = clone->errors; |
| 820 | rq->resid_len = clone->resid_len; | 770 | rq->resid_len = clone->resid_len; |
| 821 | 771 | ||
| @@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error) | |||
| 829 | } | 779 | } |
| 830 | 780 | ||
| 831 | free_rq_clone(clone); | 781 | free_rq_clone(clone); |
| 832 | 782 | blk_end_request_all(rq, error); | |
| 833 | if (unlikely(is_barrier)) { | 783 | rq_completed(md, rw, true); |
| 834 | if (unlikely(error)) | ||
| 835 | store_barrier_error(md, error); | ||
| 836 | run_queue = 0; | ||
| 837 | } else | ||
| 838 | blk_end_request_all(rq, error); | ||
| 839 | |||
| 840 | rq_completed(md, rw, run_queue); | ||
| 841 | } | 784 | } |
| 842 | 785 | ||
| 843 | static void dm_unprep_request(struct request *rq) | 786 | static void dm_unprep_request(struct request *rq) |
| @@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone) | |||
| 862 | struct request_queue *q = rq->q; | 805 | struct request_queue *q = rq->q; |
| 863 | unsigned long flags; | 806 | unsigned long flags; |
| 864 | 807 | ||
| 865 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
| 866 | /* | ||
| 867 | * Barrier clones share an original request. | ||
| 868 | * Leave it to dm_end_request(), which handles this special | ||
| 869 | * case. | ||
| 870 | */ | ||
| 871 | dm_end_request(clone, DM_ENDIO_REQUEUE); | ||
| 872 | return; | ||
| 873 | } | ||
| 874 | |||
| 875 | dm_unprep_request(rq); | 808 | dm_unprep_request(rq); |
| 876 | 809 | ||
| 877 | spin_lock_irqsave(q->queue_lock, flags); | 810 | spin_lock_irqsave(q->queue_lock, flags); |
| @@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error) | |||
| 961 | struct dm_rq_target_io *tio = clone->end_io_data; | 894 | struct dm_rq_target_io *tio = clone->end_io_data; |
| 962 | struct request *rq = tio->orig; | 895 | struct request *rq = tio->orig; |
| 963 | 896 | ||
| 964 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
| 965 | /* | ||
| 966 | * Barrier clones share an original request. So can't use | ||
| 967 | * softirq_done with the original. | ||
| 968 | * Pass the clone to dm_done() directly in this special case. | ||
| 969 | * It is safe (even if clone->q->queue_lock is held here) | ||
| 970 | * because there is no I/O dispatching during the completion | ||
| 971 | * of barrier clone. | ||
| 972 | */ | ||
| 973 | dm_done(clone, error, true); | ||
| 974 | return; | ||
| 975 | } | ||
| 976 | |||
| 977 | tio->error = error; | 897 | tio->error = error; |
| 978 | rq->completion_data = clone; | 898 | rq->completion_data = clone; |
| 979 | blk_complete_request(rq); | 899 | blk_complete_request(rq); |
| @@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error) | |||
| 990 | struct dm_rq_target_io *tio = clone->end_io_data; | 910 | struct dm_rq_target_io *tio = clone->end_io_data; |
| 991 | struct request *rq = tio->orig; | 911 | struct request *rq = tio->orig; |
| 992 | 912 | ||
| 993 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
| 994 | /* | ||
| 995 | * Barrier clones share an original request. | ||
| 996 | * Leave it to dm_end_request(), which handles this special | ||
| 997 | * case. | ||
| 998 | */ | ||
| 999 | BUG_ON(error > 0); | ||
| 1000 | dm_end_request(clone, error); | ||
| 1001 | return; | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | rq->cmd_flags |= REQ_FAILED; | 913 | rq->cmd_flags |= REQ_FAILED; |
| 1005 | dm_complete_request(clone, error); | 914 | dm_complete_request(clone, error); |
| 1006 | } | 915 | } |
| @@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
| 1119 | } | 1028 | } |
| 1120 | 1029 | ||
| 1121 | /* | 1030 | /* |
| 1122 | * Creates a little bio that is just does part of a bvec. | 1031 | * Creates a little bio that just does part of a bvec. |
| 1123 | */ | 1032 | */ |
| 1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1033 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
| 1125 | unsigned short idx, unsigned int offset, | 1034 | unsigned short idx, unsigned int offset, |
| @@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
| 1134 | 1043 | ||
| 1135 | clone->bi_sector = sector; | 1044 | clone->bi_sector = sector; |
| 1136 | clone->bi_bdev = bio->bi_bdev; | 1045 | clone->bi_bdev = bio->bi_bdev; |
| 1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1046 | clone->bi_rw = bio->bi_rw; |
| 1138 | clone->bi_vcnt = 1; | 1047 | clone->bi_vcnt = 1; |
| 1139 | clone->bi_size = to_bytes(len); | 1048 | clone->bi_size = to_bytes(len); |
| 1140 | clone->bi_io_vec->bv_offset = offset; | 1049 | clone->bi_io_vec->bv_offset = offset; |
| @@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
| 1161 | 1070 | ||
| 1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1071 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
| 1163 | __bio_clone(clone, bio); | 1072 | __bio_clone(clone, bio); |
| 1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
| 1165 | clone->bi_destructor = dm_bio_destructor; | 1073 | clone->bi_destructor = dm_bio_destructor; |
| 1166 | clone->bi_sector = sector; | 1074 | clone->bi_sector = sector; |
| 1167 | clone->bi_idx = idx; | 1075 | clone->bi_idx = idx; |
| @@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
| 1225 | __issue_target_request(ci, ti, request_nr, len); | 1133 | __issue_target_request(ci, ti, request_nr, len); |
| 1226 | } | 1134 | } |
| 1227 | 1135 | ||
| 1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1136 | static int __clone_and_map_empty_flush(struct clone_info *ci) |
| 1229 | { | 1137 | { |
| 1230 | unsigned target_nr = 0; | 1138 | unsigned target_nr = 0; |
| 1231 | struct dm_target *ti; | 1139 | struct dm_target *ti; |
| 1232 | 1140 | ||
| 1141 | BUG_ON(bio_has_data(ci->bio)); | ||
| 1233 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | 1142 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
| 1234 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); | 1143 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); |
| 1235 | 1144 | ||
| 1236 | ci->sector_count = 0; | ||
| 1237 | |||
| 1238 | return 0; | 1145 | return 0; |
| 1239 | } | 1146 | } |
| 1240 | 1147 | ||
| @@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
| 1289 | sector_t len = 0, max; | 1196 | sector_t len = 0, max; |
| 1290 | struct dm_target_io *tio; | 1197 | struct dm_target_io *tio; |
| 1291 | 1198 | ||
| 1292 | if (unlikely(bio_empty_barrier(bio))) | ||
| 1293 | return __clone_and_map_empty_barrier(ci); | ||
| 1294 | |||
| 1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1199 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
| 1296 | return __clone_and_map_discard(ci); | 1200 | return __clone_and_map_discard(ci); |
| 1297 | 1201 | ||
| @@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
| 1383 | 1287 | ||
| 1384 | ci.map = dm_get_live_table(md); | 1288 | ci.map = dm_get_live_table(md); |
| 1385 | if (unlikely(!ci.map)) { | 1289 | if (unlikely(!ci.map)) { |
| 1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1290 | bio_io_error(bio); |
| 1387 | bio_io_error(bio); | ||
| 1388 | else | ||
| 1389 | if (!md->barrier_error) | ||
| 1390 | md->barrier_error = -EIO; | ||
| 1391 | return; | 1291 | return; |
| 1392 | } | 1292 | } |
| 1393 | 1293 | ||
| 1394 | ci.md = md; | 1294 | ci.md = md; |
| 1395 | ci.bio = bio; | ||
| 1396 | ci.io = alloc_io(md); | 1295 | ci.io = alloc_io(md); |
| 1397 | ci.io->error = 0; | 1296 | ci.io->error = 0; |
| 1398 | atomic_set(&ci.io->io_count, 1); | 1297 | atomic_set(&ci.io->io_count, 1); |
| @@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
| 1400 | ci.io->md = md; | 1299 | ci.io->md = md; |
| 1401 | spin_lock_init(&ci.io->endio_lock); | 1300 | spin_lock_init(&ci.io->endio_lock); |
| 1402 | ci.sector = bio->bi_sector; | 1301 | ci.sector = bio->bi_sector; |
| 1403 | ci.sector_count = bio_sectors(bio); | ||
| 1404 | if (unlikely(bio_empty_barrier(bio))) | ||
| 1405 | ci.sector_count = 1; | ||
| 1406 | ci.idx = bio->bi_idx; | 1302 | ci.idx = bio->bi_idx; |
| 1407 | 1303 | ||
| 1408 | start_io_acct(ci.io); | 1304 | start_io_acct(ci.io); |
| 1409 | while (ci.sector_count && !error) | 1305 | if (bio->bi_rw & REQ_FLUSH) { |
| 1410 | error = __clone_and_map(&ci); | 1306 | ci.bio = &ci.md->flush_bio; |
| 1307 | ci.sector_count = 0; | ||
| 1308 | error = __clone_and_map_empty_flush(&ci); | ||
| 1309 | /* dec_pending submits any data associated with flush */ | ||
| 1310 | } else { | ||
| 1311 | ci.bio = bio; | ||
| 1312 | ci.sector_count = bio_sectors(bio); | ||
| 1313 | while (ci.sector_count && !error) | ||
| 1314 | error = __clone_and_map(&ci); | ||
| 1315 | } | ||
| 1411 | 1316 | ||
| 1412 | /* drop the extra reference count */ | 1317 | /* drop the extra reference count */ |
| 1413 | dec_pending(ci.io, error); | 1318 | dec_pending(ci.io, error); |
| @@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
| 1491 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1396 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
| 1492 | part_stat_unlock(); | 1397 | part_stat_unlock(); |
| 1493 | 1398 | ||
| 1494 | /* | 1399 | /* if we're suspended, we have to queue this io for later */ |
| 1495 | * If we're suspended or the thread is processing barriers | 1400 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
| 1496 | * we have to queue this io for later. | ||
| 1497 | */ | ||
| 1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | ||
| 1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | ||
| 1500 | up_read(&md->io_lock); | 1401 | up_read(&md->io_lock); |
| 1501 | 1402 | ||
| 1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1403 | if (bio_rw(bio) != READA) |
| 1503 | bio_rw(bio) == READA) { | 1404 | queue_io(md, bio); |
| 1405 | else | ||
| 1504 | bio_io_error(bio); | 1406 | bio_io_error(bio); |
| 1505 | return 0; | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | queue_io(md, bio); | ||
| 1509 | |||
| 1510 | return 0; | 1407 | return 0; |
| 1511 | } | 1408 | } |
| 1512 | 1409 | ||
| @@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
| 1537 | return _dm_request(q, bio); | 1434 | return _dm_request(q, bio); |
| 1538 | } | 1435 | } |
| 1539 | 1436 | ||
| 1540 | static bool dm_rq_is_flush_request(struct request *rq) | ||
| 1541 | { | ||
| 1542 | if (rq->cmd_flags & REQ_FLUSH) | ||
| 1543 | return true; | ||
| 1544 | else | ||
| 1545 | return false; | ||
| 1546 | } | ||
| 1547 | |||
| 1548 | void dm_dispatch_request(struct request *rq) | 1437 | void dm_dispatch_request(struct request *rq) |
| 1549 | { | 1438 | { |
| 1550 | int r; | 1439 | int r; |
| @@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
| 1592 | { | 1481 | { |
| 1593 | int r; | 1482 | int r; |
| 1594 | 1483 | ||
| 1595 | if (dm_rq_is_flush_request(rq)) { | 1484 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, |
| 1596 | blk_rq_init(NULL, clone); | 1485 | dm_rq_bio_constructor, tio); |
| 1597 | clone->cmd_type = REQ_TYPE_FS; | 1486 | if (r) |
| 1598 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | 1487 | return r; |
| 1599 | } else { | ||
| 1600 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
| 1601 | dm_rq_bio_constructor, tio); | ||
| 1602 | if (r) | ||
| 1603 | return r; | ||
| 1604 | |||
| 1605 | clone->cmd = rq->cmd; | ||
| 1606 | clone->cmd_len = rq->cmd_len; | ||
| 1607 | clone->sense = rq->sense; | ||
| 1608 | clone->buffer = rq->buffer; | ||
| 1609 | } | ||
| 1610 | 1488 | ||
| 1489 | clone->cmd = rq->cmd; | ||
| 1490 | clone->cmd_len = rq->cmd_len; | ||
| 1491 | clone->sense = rq->sense; | ||
| 1492 | clone->buffer = rq->buffer; | ||
| 1611 | clone->end_io = end_clone_request; | 1493 | clone->end_io = end_clone_request; |
| 1612 | clone->end_io_data = tio; | 1494 | clone->end_io_data = tio; |
| 1613 | 1495 | ||
| @@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
| 1648 | struct mapped_device *md = q->queuedata; | 1530 | struct mapped_device *md = q->queuedata; |
| 1649 | struct request *clone; | 1531 | struct request *clone; |
| 1650 | 1532 | ||
| 1651 | if (unlikely(dm_rq_is_flush_request(rq))) | ||
| 1652 | return BLKPREP_OK; | ||
| 1653 | |||
| 1654 | if (unlikely(rq->special)) { | 1533 | if (unlikely(rq->special)) { |
| 1655 | DMWARN("Already has something in rq->special."); | 1534 | DMWARN("Already has something in rq->special."); |
| 1656 | return BLKPREP_KILL; | 1535 | return BLKPREP_KILL; |
| @@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q) | |||
| 1727 | struct dm_table *map = dm_get_live_table(md); | 1606 | struct dm_table *map = dm_get_live_table(md); |
| 1728 | struct dm_target *ti; | 1607 | struct dm_target *ti; |
| 1729 | struct request *rq, *clone; | 1608 | struct request *rq, *clone; |
| 1609 | sector_t pos; | ||
| 1730 | 1610 | ||
| 1731 | /* | 1611 | /* |
| 1732 | * For suspend, check blk_queue_stopped() and increment | 1612 | * For suspend, check blk_queue_stopped() and increment |
| @@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q) | |||
| 1739 | if (!rq) | 1619 | if (!rq) |
| 1740 | goto plug_and_out; | 1620 | goto plug_and_out; |
| 1741 | 1621 | ||
| 1742 | if (unlikely(dm_rq_is_flush_request(rq))) { | 1622 | /* always use block 0 to find the target for flushes for now */ |
| 1743 | BUG_ON(md->flush_request); | 1623 | pos = 0; |
| 1744 | md->flush_request = rq; | 1624 | if (!(rq->cmd_flags & REQ_FLUSH)) |
| 1745 | blk_start_request(rq); | 1625 | pos = blk_rq_pos(rq); |
| 1746 | queue_work(md->wq, &md->barrier_work); | 1626 | |
| 1747 | goto out; | 1627 | ti = dm_table_find_target(map, pos); |
| 1748 | } | 1628 | BUG_ON(!dm_target_is_valid(ti)); |
| 1749 | 1629 | ||
| 1750 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
| 1751 | if (ti->type->busy && ti->type->busy(ti)) | 1630 | if (ti->type->busy && ti->type->busy(ti)) |
| 1752 | goto plug_and_out; | 1631 | goto plug_and_out; |
| 1753 | 1632 | ||
| @@ -1918,7 +1797,6 @@ out: | |||
| 1918 | static const struct block_device_operations dm_blk_dops; | 1797 | static const struct block_device_operations dm_blk_dops; |
| 1919 | 1798 | ||
| 1920 | static void dm_wq_work(struct work_struct *work); | 1799 | static void dm_wq_work(struct work_struct *work); |
| 1921 | static void dm_rq_barrier_work(struct work_struct *work); | ||
| 1922 | 1800 | ||
| 1923 | static void dm_init_md_queue(struct mapped_device *md) | 1801 | static void dm_init_md_queue(struct mapped_device *md) |
| 1924 | { | 1802 | { |
| @@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
| 1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1818 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
| 1941 | md->queue->unplug_fn = dm_unplug_all; | 1819 | md->queue->unplug_fn = dm_unplug_all; |
| 1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1820 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
| 1821 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
| 1943 | } | 1822 | } |
| 1944 | 1823 | ||
| 1945 | /* | 1824 | /* |
| @@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 1972 | mutex_init(&md->suspend_lock); | 1851 | mutex_init(&md->suspend_lock); |
| 1973 | mutex_init(&md->type_lock); | 1852 | mutex_init(&md->type_lock); |
| 1974 | spin_lock_init(&md->deferred_lock); | 1853 | spin_lock_init(&md->deferred_lock); |
| 1975 | spin_lock_init(&md->barrier_error_lock); | ||
| 1976 | rwlock_init(&md->map_lock); | 1854 | rwlock_init(&md->map_lock); |
| 1977 | atomic_set(&md->holders, 1); | 1855 | atomic_set(&md->holders, 1); |
| 1978 | atomic_set(&md->open_count, 0); | 1856 | atomic_set(&md->open_count, 0); |
| @@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 1995 | atomic_set(&md->pending[1], 0); | 1873 | atomic_set(&md->pending[1], 0); |
| 1996 | init_waitqueue_head(&md->wait); | 1874 | init_waitqueue_head(&md->wait); |
| 1997 | INIT_WORK(&md->work, dm_wq_work); | 1875 | INIT_WORK(&md->work, dm_wq_work); |
| 1998 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | ||
| 1999 | init_waitqueue_head(&md->eventq); | 1876 | init_waitqueue_head(&md->eventq); |
| 2000 | 1877 | ||
| 2001 | md->disk->major = _major; | 1878 | md->disk->major = _major; |
| @@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 2015 | if (!md->bdev) | 1892 | if (!md->bdev) |
| 2016 | goto bad_bdev; | 1893 | goto bad_bdev; |
| 2017 | 1894 | ||
| 1895 | bio_init(&md->flush_bio); | ||
| 1896 | md->flush_bio.bi_bdev = md->bdev; | ||
| 1897 | md->flush_bio.bi_rw = WRITE_FLUSH; | ||
| 1898 | |||
| 2018 | /* Populate the mapping, nobody knows we exist yet */ | 1899 | /* Populate the mapping, nobody knows we exist yet */ |
| 2019 | spin_lock(&_minor_lock); | 1900 | spin_lock(&_minor_lock); |
| 2020 | old_md = idr_replace(&_minor_idr, md, minor); | 1901 | old_md = idr_replace(&_minor_idr, md, minor); |
| @@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
| 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2126 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
| 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2127 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
| 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2128 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
| 2248 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); | ||
| 2249 | 2129 | ||
| 2250 | elv_register_queue(md->queue); | 2130 | elv_register_queue(md->queue); |
| 2251 | 2131 | ||
| @@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
| 2406 | return r; | 2286 | return r; |
| 2407 | } | 2287 | } |
| 2408 | 2288 | ||
| 2409 | static void dm_flush(struct mapped_device *md) | ||
| 2410 | { | ||
| 2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
| 2412 | |||
| 2413 | bio_init(&md->barrier_bio); | ||
| 2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
| 2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
| 2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
| 2417 | |||
| 2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
| 2419 | } | ||
| 2420 | |||
| 2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | ||
| 2422 | { | ||
| 2423 | md->barrier_error = 0; | ||
| 2424 | |||
| 2425 | dm_flush(md); | ||
| 2426 | |||
| 2427 | if (!bio_empty_barrier(bio)) { | ||
| 2428 | __split_and_process_bio(md, bio); | ||
| 2429 | /* | ||
| 2430 | * If the request isn't supported, don't waste time with | ||
| 2431 | * the second flush. | ||
| 2432 | */ | ||
| 2433 | if (md->barrier_error != -EOPNOTSUPP) | ||
| 2434 | dm_flush(md); | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | ||
| 2438 | bio_endio(bio, md->barrier_error); | ||
| 2439 | else { | ||
| 2440 | spin_lock_irq(&md->deferred_lock); | ||
| 2441 | bio_list_add_head(&md->deferred, bio); | ||
| 2442 | spin_unlock_irq(&md->deferred_lock); | ||
| 2443 | } | ||
| 2444 | } | ||
| 2445 | |||
| 2446 | /* | 2289 | /* |
| 2447 | * Process the deferred bios | 2290 | * Process the deferred bios |
| 2448 | */ | 2291 | */ |
| @@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work) | |||
| 2452 | work); | 2295 | work); |
| 2453 | struct bio *c; | 2296 | struct bio *c; |
| 2454 | 2297 | ||
| 2455 | down_write(&md->io_lock); | 2298 | down_read(&md->io_lock); |
| 2456 | 2299 | ||
| 2457 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2300 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
| 2458 | spin_lock_irq(&md->deferred_lock); | 2301 | spin_lock_irq(&md->deferred_lock); |
| 2459 | c = bio_list_pop(&md->deferred); | 2302 | c = bio_list_pop(&md->deferred); |
| 2460 | spin_unlock_irq(&md->deferred_lock); | 2303 | spin_unlock_irq(&md->deferred_lock); |
| 2461 | 2304 | ||
| 2462 | if (!c) { | 2305 | if (!c) |
| 2463 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
| 2464 | break; | 2306 | break; |
| 2465 | } | ||
| 2466 | 2307 | ||
| 2467 | up_write(&md->io_lock); | 2308 | up_read(&md->io_lock); |
| 2468 | 2309 | ||
| 2469 | if (dm_request_based(md)) | 2310 | if (dm_request_based(md)) |
| 2470 | generic_make_request(c); | 2311 | generic_make_request(c); |
| 2471 | else { | 2312 | else |
| 2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2313 | __split_and_process_bio(md, c); |
| 2473 | process_barrier(md, c); | ||
| 2474 | else | ||
| 2475 | __split_and_process_bio(md, c); | ||
| 2476 | } | ||
| 2477 | 2314 | ||
| 2478 | down_write(&md->io_lock); | 2315 | down_read(&md->io_lock); |
| 2479 | } | 2316 | } |
| 2480 | 2317 | ||
| 2481 | up_write(&md->io_lock); | 2318 | up_read(&md->io_lock); |
| 2482 | } | 2319 | } |
| 2483 | 2320 | ||
| 2484 | static void dm_queue_flush(struct mapped_device *md) | 2321 | static void dm_queue_flush(struct mapped_device *md) |
| @@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md) | |||
| 2488 | queue_work(md->wq, &md->work); | 2325 | queue_work(md->wq, &md->work); |
| 2489 | } | 2326 | } |
| 2490 | 2327 | ||
| 2491 | static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) | ||
| 2492 | { | ||
| 2493 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 2494 | |||
| 2495 | tio->info.target_request_nr = request_nr; | ||
| 2496 | } | ||
| 2497 | |||
| 2498 | /* Issue barrier requests to targets and wait for their completion. */ | ||
| 2499 | static int dm_rq_barrier(struct mapped_device *md) | ||
| 2500 | { | ||
| 2501 | int i, j; | ||
| 2502 | struct dm_table *map = dm_get_live_table(md); | ||
| 2503 | unsigned num_targets = dm_table_get_num_targets(map); | ||
| 2504 | struct dm_target *ti; | ||
| 2505 | struct request *clone; | ||
| 2506 | |||
| 2507 | md->barrier_error = 0; | ||
| 2508 | |||
| 2509 | for (i = 0; i < num_targets; i++) { | ||
| 2510 | ti = dm_table_get_target(map, i); | ||
| 2511 | for (j = 0; j < ti->num_flush_requests; j++) { | ||
| 2512 | clone = clone_rq(md->flush_request, md, GFP_NOIO); | ||
| 2513 | dm_rq_set_target_request_nr(clone, j); | ||
| 2514 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
| 2515 | map_request(ti, clone, md); | ||
| 2516 | } | ||
| 2517 | } | ||
| 2518 | |||
| 2519 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
| 2520 | dm_table_put(map); | ||
| 2521 | |||
| 2522 | return md->barrier_error; | ||
| 2523 | } | ||
| 2524 | |||
| 2525 | static void dm_rq_barrier_work(struct work_struct *work) | ||
| 2526 | { | ||
| 2527 | int error; | ||
| 2528 | struct mapped_device *md = container_of(work, struct mapped_device, | ||
| 2529 | barrier_work); | ||
| 2530 | struct request_queue *q = md->queue; | ||
| 2531 | struct request *rq; | ||
| 2532 | unsigned long flags; | ||
| 2533 | |||
| 2534 | /* | ||
| 2535 | * Hold the md reference here and leave it at the last part so that | ||
| 2536 | * the md can't be deleted by device opener when the barrier request | ||
| 2537 | * completes. | ||
| 2538 | */ | ||
| 2539 | dm_get(md); | ||
| 2540 | |||
| 2541 | error = dm_rq_barrier(md); | ||
| 2542 | |||
| 2543 | rq = md->flush_request; | ||
| 2544 | md->flush_request = NULL; | ||
| 2545 | |||
| 2546 | if (error == DM_ENDIO_REQUEUE) { | ||
| 2547 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 2548 | blk_requeue_request(q, rq); | ||
| 2549 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 2550 | } else | ||
| 2551 | blk_end_request_all(rq, error); | ||
| 2552 | |||
| 2553 | blk_run_queue(q); | ||
| 2554 | |||
| 2555 | dm_put(md); | ||
| 2556 | } | ||
| 2557 | |||
| 2558 | /* | 2328 | /* |
| 2559 | * Swap in a new table, returning the old one for the caller to destroy. | 2329 | * Swap in a new table, returning the old one for the caller to destroy. |
| 2560 | */ | 2330 | */ |
| @@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 2677 | * | 2447 | * |
| 2678 | * To get all processes out of __split_and_process_bio in dm_request, | 2448 | * To get all processes out of __split_and_process_bio in dm_request, |
| 2679 | * we take the write lock. To prevent any process from reentering | 2449 | * we take the write lock. To prevent any process from reentering |
| 2680 | * __split_and_process_bio from dm_request, we set | 2450 | * __split_and_process_bio from dm_request and quiesce the thread |
| 2681 | * DMF_QUEUE_IO_TO_THREAD. | 2451 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
| 2682 | * | 2452 | * flush_workqueue(md->wq). |
| 2683 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | ||
| 2684 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | ||
| 2685 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | ||
| 2686 | * further calls to __split_and_process_bio from dm_wq_work. | ||
| 2687 | */ | 2453 | */ |
| 2688 | down_write(&md->io_lock); | 2454 | down_write(&md->io_lock); |
| 2689 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2455 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
| 2690 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
| 2691 | up_write(&md->io_lock); | 2456 | up_write(&md->io_lock); |
| 2692 | 2457 | ||
| 2693 | /* | 2458 | /* |
| 2694 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | 2459 | * Stop md->queue before flushing md->wq in case request-based |
| 2695 | * can be kicked until md->queue is stopped. So stop md->queue before | 2460 | * dm defers requests to md->wq from md->queue. |
| 2696 | * flushing md->wq. | ||
| 2697 | */ | 2461 | */ |
| 2698 | if (dm_request_based(md)) | 2462 | if (dm_request_based(md)) |
| 2699 | stop_queue(md->queue); | 2463 | stop_queue(md->queue); |
