aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:07:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:07:18 -0400
commita2887097f25cd38cadfc11d10769e2b349fb5eca (patch)
treecd4adcb305365d6ba9acd2c02d4eb9d0125c6f8d /drivers/md/dm.c
parent8abfc6e7a45eb74e51904bbae676fae008b11366 (diff)
parent005a1d15f5a6b2bb4ada80349513effbf22b4588 (diff)
Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits) xen-blkfront: disable barrier/flush write support Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c block: remove BLKDEV_IFL_WAIT aic7xxx_old: removed unused 'req' variable block: remove the BH_Eopnotsupp flag block: remove the BLKDEV_IFL_BARRIER flag block: remove the WRITE_BARRIER flag swap: do not send discards as barriers fat: do not send discards as barriers ext4: do not send discards as barriers jbd2: replace barriers with explicit flush / FUA usage jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier jbd: replace barriers with explicit flush / FUA usage nilfs2: replace barriers with explicit flush / FUA usage reiserfs: replace barriers with explicit flush / FUA usage gfs2: replace barriers with explicit flush / FUA usage btrfs: replace barriers with explicit flush / FUA usage xfs: replace barriers with explicit flush / FUA usage block: pass gfp_mask and flags to sb_issue_discard dm: convey that all flushes are processed as empty ...
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c398
1 files changed, 81 insertions, 317 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7967eca5a2d5..7cb1352f7e7a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
110#define DMF_FREEING 3 110#define DMF_FREEING 3
111#define DMF_DELETING 4 111#define DMF_DELETING 4
112#define DMF_NOFLUSH_SUSPENDING 5 112#define DMF_NOFLUSH_SUSPENDING 5
113#define DMF_QUEUE_IO_TO_THREAD 6
114 113
115/* 114/*
116 * Work processed by per-device workqueue. 115 * Work processed by per-device workqueue.
@@ -144,24 +143,9 @@ struct mapped_device {
144 spinlock_t deferred_lock; 143 spinlock_t deferred_lock;
145 144
146 /* 145 /*
147 * An error from the barrier request currently being processed. 146 * Processing queue (flush)
148 */
149 int barrier_error;
150
151 /*
152 * Protect barrier_error from concurrent endio processing
153 * in request-based dm.
154 */
155 spinlock_t barrier_error_lock;
156
157 /*
158 * Processing queue (flush/barriers)
159 */ 147 */
160 struct workqueue_struct *wq; 148 struct workqueue_struct *wq;
161 struct work_struct barrier_work;
162
163 /* A pointer to the currently processing pre/post flush request */
164 struct request *flush_request;
165 149
166 /* 150 /*
167 * The current mapping. 151 * The current mapping.
@@ -200,8 +184,8 @@ struct mapped_device {
200 /* sysfs handle */ 184 /* sysfs handle */
201 struct kobject kobj; 185 struct kobject kobj;
202 186
203 /* zero-length barrier that will be cloned and submitted to targets */ 187 /* zero-length flush that will be cloned and submitted to targets */
204 struct bio barrier_bio; 188 struct bio flush_bio;
205}; 189};
206 190
207/* 191/*
@@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io)
512 496
513 /* 497 /*
514 * After this is decremented the bio must not be touched if it is 498 * After this is decremented the bio must not be touched if it is
515 * a barrier. 499 * a flush.
516 */ 500 */
517 dm_disk(md)->part0.in_flight[rw] = pending = 501 dm_disk(md)->part0.in_flight[rw] = pending =
518 atomic_dec_return(&md->pending[rw]); 502 atomic_dec_return(&md->pending[rw]);
@@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io)
528 */ 512 */
529static void queue_io(struct mapped_device *md, struct bio *bio) 513static void queue_io(struct mapped_device *md, struct bio *bio)
530{ 514{
531 down_write(&md->io_lock); 515 unsigned long flags;
532 516
533 spin_lock_irq(&md->deferred_lock); 517 spin_lock_irqsave(&md->deferred_lock, flags);
534 bio_list_add(&md->deferred, bio); 518 bio_list_add(&md->deferred, bio);
535 spin_unlock_irq(&md->deferred_lock); 519 spin_unlock_irqrestore(&md->deferred_lock, flags);
536 520 queue_work(md->wq, &md->work);
537 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
538 queue_work(md->wq, &md->work);
539
540 up_write(&md->io_lock);
541} 521}
542 522
543/* 523/*
@@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error)
625 * Target requested pushing back the I/O. 605 * Target requested pushing back the I/O.
626 */ 606 */
627 spin_lock_irqsave(&md->deferred_lock, flags); 607 spin_lock_irqsave(&md->deferred_lock, flags);
628 if (__noflush_suspending(md)) { 608 if (__noflush_suspending(md))
629 if (!(io->bio->bi_rw & REQ_HARDBARRIER)) 609 bio_list_add_head(&md->deferred, io->bio);
630 bio_list_add_head(&md->deferred, 610 else
631 io->bio);
632 } else
633 /* noflush suspend was interrupted. */ 611 /* noflush suspend was interrupted. */
634 io->error = -EIO; 612 io->error = -EIO;
635 spin_unlock_irqrestore(&md->deferred_lock, flags); 613 spin_unlock_irqrestore(&md->deferred_lock, flags);
@@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error)
637 615
638 io_error = io->error; 616 io_error = io->error;
639 bio = io->bio; 617 bio = io->bio;
618 end_io_acct(io);
619 free_io(md, io);
620
621 if (io_error == DM_ENDIO_REQUEUE)
622 return;
640 623
641 if (bio->bi_rw & REQ_HARDBARRIER) { 624 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
642 /* 625 /*
643 * There can be just one barrier request so we use 626 * Preflush done for flush with data, reissue
644 * a per-device variable for error reporting. 627 * without REQ_FLUSH.
645 * Note that you can't touch the bio after end_io_acct
646 *
647 * We ignore -EOPNOTSUPP for empty flush reported by
648 * underlying devices. We assume that if the device
649 * doesn't support empty barriers, it doesn't need
650 * cache flushing commands.
651 */ 628 */
652 if (!md->barrier_error && 629 bio->bi_rw &= ~REQ_FLUSH;
653 !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) 630 queue_io(md, bio);
654 md->barrier_error = io_error;
655 end_io_acct(io);
656 free_io(md, io);
657 } else { 631 } else {
658 end_io_acct(io); 632 /* done with normal IO or empty flush */
659 free_io(md, io); 633 trace_block_bio_complete(md->queue, bio);
660 634 bio_endio(bio, io_error);
661 if (io_error != DM_ENDIO_REQUEUE) {
662 trace_block_bio_complete(md->queue, bio);
663
664 bio_endio(bio, io_error);
665 }
666 } 635 }
667 } 636 }
668} 637}
@@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error)
755 blk_update_request(tio->orig, 0, nr_bytes); 724 blk_update_request(tio->orig, 0, nr_bytes);
756} 725}
757 726
758static void store_barrier_error(struct mapped_device *md, int error)
759{
760 unsigned long flags;
761
762 spin_lock_irqsave(&md->barrier_error_lock, flags);
763 /*
764 * Basically, the first error is taken, but:
765 * -EOPNOTSUPP supersedes any I/O error.
766 * Requeue request supersedes any I/O error but -EOPNOTSUPP.
767 */
768 if (!md->barrier_error || error == -EOPNOTSUPP ||
769 (md->barrier_error != -EOPNOTSUPP &&
770 error == DM_ENDIO_REQUEUE))
771 md->barrier_error = error;
772 spin_unlock_irqrestore(&md->barrier_error_lock, flags);
773}
774
775/* 727/*
776 * Don't touch any member of the md after calling this function because 728 * Don't touch any member of the md after calling this function because
777 * the md may be freed in dm_put() at the end of this function. 729 * the md may be freed in dm_put() at the end of this function.
@@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone)
809static void dm_end_request(struct request *clone, int error) 761static void dm_end_request(struct request *clone, int error)
810{ 762{
811 int rw = rq_data_dir(clone); 763 int rw = rq_data_dir(clone);
812 int run_queue = 1;
813 bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
814 struct dm_rq_target_io *tio = clone->end_io_data; 764 struct dm_rq_target_io *tio = clone->end_io_data;
815 struct mapped_device *md = tio->md; 765 struct mapped_device *md = tio->md;
816 struct request *rq = tio->orig; 766 struct request *rq = tio->orig;
817 767
818 if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { 768 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
819 rq->errors = clone->errors; 769 rq->errors = clone->errors;
820 rq->resid_len = clone->resid_len; 770 rq->resid_len = clone->resid_len;
821 771
@@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error)
829 } 779 }
830 780
831 free_rq_clone(clone); 781 free_rq_clone(clone);
832 782 blk_end_request_all(rq, error);
833 if (unlikely(is_barrier)) { 783 rq_completed(md, rw, true);
834 if (unlikely(error))
835 store_barrier_error(md, error);
836 run_queue = 0;
837 } else
838 blk_end_request_all(rq, error);
839
840 rq_completed(md, rw, run_queue);
841} 784}
842 785
843static void dm_unprep_request(struct request *rq) 786static void dm_unprep_request(struct request *rq)
@@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone)
862 struct request_queue *q = rq->q; 805 struct request_queue *q = rq->q;
863 unsigned long flags; 806 unsigned long flags;
864 807
865 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
866 /*
867 * Barrier clones share an original request.
868 * Leave it to dm_end_request(), which handles this special
869 * case.
870 */
871 dm_end_request(clone, DM_ENDIO_REQUEUE);
872 return;
873 }
874
875 dm_unprep_request(rq); 808 dm_unprep_request(rq);
876 809
877 spin_lock_irqsave(q->queue_lock, flags); 810 spin_lock_irqsave(q->queue_lock, flags);
@@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error)
961 struct dm_rq_target_io *tio = clone->end_io_data; 894 struct dm_rq_target_io *tio = clone->end_io_data;
962 struct request *rq = tio->orig; 895 struct request *rq = tio->orig;
963 896
964 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
965 /*
966 * Barrier clones share an original request. So can't use
967 * softirq_done with the original.
968 * Pass the clone to dm_done() directly in this special case.
969 * It is safe (even if clone->q->queue_lock is held here)
970 * because there is no I/O dispatching during the completion
971 * of barrier clone.
972 */
973 dm_done(clone, error, true);
974 return;
975 }
976
977 tio->error = error; 897 tio->error = error;
978 rq->completion_data = clone; 898 rq->completion_data = clone;
979 blk_complete_request(rq); 899 blk_complete_request(rq);
@@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error)
990 struct dm_rq_target_io *tio = clone->end_io_data; 910 struct dm_rq_target_io *tio = clone->end_io_data;
991 struct request *rq = tio->orig; 911 struct request *rq = tio->orig;
992 912
993 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
994 /*
995 * Barrier clones share an original request.
996 * Leave it to dm_end_request(), which handles this special
997 * case.
998 */
999 BUG_ON(error > 0);
1000 dm_end_request(clone, error);
1001 return;
1002 }
1003
1004 rq->cmd_flags |= REQ_FAILED; 913 rq->cmd_flags |= REQ_FAILED;
1005 dm_complete_request(clone, error); 914 dm_complete_request(clone, error);
1006} 915}
@@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio)
1119} 1028}
1120 1029
1121/* 1030/*
1122 * Creates a little bio that is just does part of a bvec. 1031 * Creates a little bio that just does part of a bvec.
1123 */ 1032 */
1124static struct bio *split_bvec(struct bio *bio, sector_t sector, 1033static struct bio *split_bvec(struct bio *bio, sector_t sector,
1125 unsigned short idx, unsigned int offset, 1034 unsigned short idx, unsigned int offset,
@@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
1134 1043
1135 clone->bi_sector = sector; 1044 clone->bi_sector = sector;
1136 clone->bi_bdev = bio->bi_bdev; 1045 clone->bi_bdev = bio->bi_bdev;
1137 clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; 1046 clone->bi_rw = bio->bi_rw;
1138 clone->bi_vcnt = 1; 1047 clone->bi_vcnt = 1;
1139 clone->bi_size = to_bytes(len); 1048 clone->bi_size = to_bytes(len);
1140 clone->bi_io_vec->bv_offset = offset; 1049 clone->bi_io_vec->bv_offset = offset;
@@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
1161 1070
1162 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 1071 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
1163 __bio_clone(clone, bio); 1072 __bio_clone(clone, bio);
1164 clone->bi_rw &= ~REQ_HARDBARRIER;
1165 clone->bi_destructor = dm_bio_destructor; 1073 clone->bi_destructor = dm_bio_destructor;
1166 clone->bi_sector = sector; 1074 clone->bi_sector = sector;
1167 clone->bi_idx = idx; 1075 clone->bi_idx = idx;
@@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
1225 __issue_target_request(ci, ti, request_nr, len); 1133 __issue_target_request(ci, ti, request_nr, len);
1226} 1134}
1227 1135
1228static int __clone_and_map_empty_barrier(struct clone_info *ci) 1136static int __clone_and_map_empty_flush(struct clone_info *ci)
1229{ 1137{
1230 unsigned target_nr = 0; 1138 unsigned target_nr = 0;
1231 struct dm_target *ti; 1139 struct dm_target *ti;
1232 1140
1141 BUG_ON(bio_has_data(ci->bio));
1233 while ((ti = dm_table_get_target(ci->map, target_nr++))) 1142 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1234 __issue_target_requests(ci, ti, ti->num_flush_requests, 0); 1143 __issue_target_requests(ci, ti, ti->num_flush_requests, 0);
1235 1144
1236 ci->sector_count = 0;
1237
1238 return 0; 1145 return 0;
1239} 1146}
1240 1147
@@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci)
1289 sector_t len = 0, max; 1196 sector_t len = 0, max;
1290 struct dm_target_io *tio; 1197 struct dm_target_io *tio;
1291 1198
1292 if (unlikely(bio_empty_barrier(bio)))
1293 return __clone_and_map_empty_barrier(ci);
1294
1295 if (unlikely(bio->bi_rw & REQ_DISCARD)) 1199 if (unlikely(bio->bi_rw & REQ_DISCARD))
1296 return __clone_and_map_discard(ci); 1200 return __clone_and_map_discard(ci);
1297 1201
@@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1383 1287
1384 ci.map = dm_get_live_table(md); 1288 ci.map = dm_get_live_table(md);
1385 if (unlikely(!ci.map)) { 1289 if (unlikely(!ci.map)) {
1386 if (!(bio->bi_rw & REQ_HARDBARRIER)) 1290 bio_io_error(bio);
1387 bio_io_error(bio);
1388 else
1389 if (!md->barrier_error)
1390 md->barrier_error = -EIO;
1391 return; 1291 return;
1392 } 1292 }
1393 1293
1394 ci.md = md; 1294 ci.md = md;
1395 ci.bio = bio;
1396 ci.io = alloc_io(md); 1295 ci.io = alloc_io(md);
1397 ci.io->error = 0; 1296 ci.io->error = 0;
1398 atomic_set(&ci.io->io_count, 1); 1297 atomic_set(&ci.io->io_count, 1);
@@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1400 ci.io->md = md; 1299 ci.io->md = md;
1401 spin_lock_init(&ci.io->endio_lock); 1300 spin_lock_init(&ci.io->endio_lock);
1402 ci.sector = bio->bi_sector; 1301 ci.sector = bio->bi_sector;
1403 ci.sector_count = bio_sectors(bio);
1404 if (unlikely(bio_empty_barrier(bio)))
1405 ci.sector_count = 1;
1406 ci.idx = bio->bi_idx; 1302 ci.idx = bio->bi_idx;
1407 1303
1408 start_io_acct(ci.io); 1304 start_io_acct(ci.io);
1409 while (ci.sector_count && !error) 1305 if (bio->bi_rw & REQ_FLUSH) {
1410 error = __clone_and_map(&ci); 1306 ci.bio = &ci.md->flush_bio;
1307 ci.sector_count = 0;
1308 error = __clone_and_map_empty_flush(&ci);
1309 /* dec_pending submits any data associated with flush */
1310 } else {
1311 ci.bio = bio;
1312 ci.sector_count = bio_sectors(bio);
1313 while (ci.sector_count && !error)
1314 error = __clone_and_map(&ci);
1315 }
1411 1316
1412 /* drop the extra reference count */ 1317 /* drop the extra reference count */
1413 dec_pending(ci.io, error); 1318 dec_pending(ci.io, error);
@@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
1491 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); 1396 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
1492 part_stat_unlock(); 1397 part_stat_unlock();
1493 1398
1494 /* 1399 /* if we're suspended, we have to queue this io for later */
1495 * If we're suspended or the thread is processing barriers 1400 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
1496 * we have to queue this io for later.
1497 */
1498 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1499 unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
1500 up_read(&md->io_lock); 1401 up_read(&md->io_lock);
1501 1402
1502 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 1403 if (bio_rw(bio) != READA)
1503 bio_rw(bio) == READA) { 1404 queue_io(md, bio);
1405 else
1504 bio_io_error(bio); 1406 bio_io_error(bio);
1505 return 0;
1506 }
1507
1508 queue_io(md, bio);
1509
1510 return 0; 1407 return 0;
1511 } 1408 }
1512 1409
@@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
1537 return _dm_request(q, bio); 1434 return _dm_request(q, bio);
1538} 1435}
1539 1436
1540static bool dm_rq_is_flush_request(struct request *rq)
1541{
1542 if (rq->cmd_flags & REQ_FLUSH)
1543 return true;
1544 else
1545 return false;
1546}
1547
1548void dm_dispatch_request(struct request *rq) 1437void dm_dispatch_request(struct request *rq)
1549{ 1438{
1550 int r; 1439 int r;
@@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq,
1592{ 1481{
1593 int r; 1482 int r;
1594 1483
1595 if (dm_rq_is_flush_request(rq)) { 1484 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1596 blk_rq_init(NULL, clone); 1485 dm_rq_bio_constructor, tio);
1597 clone->cmd_type = REQ_TYPE_FS; 1486 if (r)
1598 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); 1487 return r;
1599 } else {
1600 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1601 dm_rq_bio_constructor, tio);
1602 if (r)
1603 return r;
1604
1605 clone->cmd = rq->cmd;
1606 clone->cmd_len = rq->cmd_len;
1607 clone->sense = rq->sense;
1608 clone->buffer = rq->buffer;
1609 }
1610 1488
1489 clone->cmd = rq->cmd;
1490 clone->cmd_len = rq->cmd_len;
1491 clone->sense = rq->sense;
1492 clone->buffer = rq->buffer;
1611 clone->end_io = end_clone_request; 1493 clone->end_io = end_clone_request;
1612 clone->end_io_data = tio; 1494 clone->end_io_data = tio;
1613 1495
@@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1648 struct mapped_device *md = q->queuedata; 1530 struct mapped_device *md = q->queuedata;
1649 struct request *clone; 1531 struct request *clone;
1650 1532
1651 if (unlikely(dm_rq_is_flush_request(rq)))
1652 return BLKPREP_OK;
1653
1654 if (unlikely(rq->special)) { 1533 if (unlikely(rq->special)) {
1655 DMWARN("Already has something in rq->special."); 1534 DMWARN("Already has something in rq->special.");
1656 return BLKPREP_KILL; 1535 return BLKPREP_KILL;
@@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q)
1727 struct dm_table *map = dm_get_live_table(md); 1606 struct dm_table *map = dm_get_live_table(md);
1728 struct dm_target *ti; 1607 struct dm_target *ti;
1729 struct request *rq, *clone; 1608 struct request *rq, *clone;
1609 sector_t pos;
1730 1610
1731 /* 1611 /*
1732 * For suspend, check blk_queue_stopped() and increment 1612 * For suspend, check blk_queue_stopped() and increment
@@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q)
1739 if (!rq) 1619 if (!rq)
1740 goto plug_and_out; 1620 goto plug_and_out;
1741 1621
1742 if (unlikely(dm_rq_is_flush_request(rq))) { 1622 /* always use block 0 to find the target for flushes for now */
1743 BUG_ON(md->flush_request); 1623 pos = 0;
1744 md->flush_request = rq; 1624 if (!(rq->cmd_flags & REQ_FLUSH))
1745 blk_start_request(rq); 1625 pos = blk_rq_pos(rq);
1746 queue_work(md->wq, &md->barrier_work); 1626
1747 goto out; 1627 ti = dm_table_find_target(map, pos);
1748 } 1628 BUG_ON(!dm_target_is_valid(ti));
1749 1629
1750 ti = dm_table_find_target(map, blk_rq_pos(rq));
1751 if (ti->type->busy && ti->type->busy(ti)) 1630 if (ti->type->busy && ti->type->busy(ti))
1752 goto plug_and_out; 1631 goto plug_and_out;
1753 1632
@@ -1918,7 +1797,6 @@ out:
1918static const struct block_device_operations dm_blk_dops; 1797static const struct block_device_operations dm_blk_dops;
1919 1798
1920static void dm_wq_work(struct work_struct *work); 1799static void dm_wq_work(struct work_struct *work);
1921static void dm_rq_barrier_work(struct work_struct *work);
1922 1800
1923static void dm_init_md_queue(struct mapped_device *md) 1801static void dm_init_md_queue(struct mapped_device *md)
1924{ 1802{
@@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md)
1940 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1818 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1941 md->queue->unplug_fn = dm_unplug_all; 1819 md->queue->unplug_fn = dm_unplug_all;
1942 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1820 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1821 blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
1943} 1822}
1944 1823
1945/* 1824/*
@@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor)
1972 mutex_init(&md->suspend_lock); 1851 mutex_init(&md->suspend_lock);
1973 mutex_init(&md->type_lock); 1852 mutex_init(&md->type_lock);
1974 spin_lock_init(&md->deferred_lock); 1853 spin_lock_init(&md->deferred_lock);
1975 spin_lock_init(&md->barrier_error_lock);
1976 rwlock_init(&md->map_lock); 1854 rwlock_init(&md->map_lock);
1977 atomic_set(&md->holders, 1); 1855 atomic_set(&md->holders, 1);
1978 atomic_set(&md->open_count, 0); 1856 atomic_set(&md->open_count, 0);
@@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor)
1995 atomic_set(&md->pending[1], 0); 1873 atomic_set(&md->pending[1], 0);
1996 init_waitqueue_head(&md->wait); 1874 init_waitqueue_head(&md->wait);
1997 INIT_WORK(&md->work, dm_wq_work); 1875 INIT_WORK(&md->work, dm_wq_work);
1998 INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1999 init_waitqueue_head(&md->eventq); 1876 init_waitqueue_head(&md->eventq);
2000 1877
2001 md->disk->major = _major; 1878 md->disk->major = _major;
@@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor)
2015 if (!md->bdev) 1892 if (!md->bdev)
2016 goto bad_bdev; 1893 goto bad_bdev;
2017 1894
1895 bio_init(&md->flush_bio);
1896 md->flush_bio.bi_bdev = md->bdev;
1897 md->flush_bio.bi_rw = WRITE_FLUSH;
1898
2018 /* Populate the mapping, nobody knows we exist yet */ 1899 /* Populate the mapping, nobody knows we exist yet */
2019 spin_lock(&_minor_lock); 1900 spin_lock(&_minor_lock);
2020 old_md = idr_replace(&_minor_idr, md, minor); 1901 old_md = idr_replace(&_minor_idr, md, minor);
@@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2245 blk_queue_softirq_done(md->queue, dm_softirq_done); 2126 blk_queue_softirq_done(md->queue, dm_softirq_done);
2246 blk_queue_prep_rq(md->queue, dm_prep_fn); 2127 blk_queue_prep_rq(md->queue, dm_prep_fn);
2247 blk_queue_lld_busy(md->queue, dm_lld_busy); 2128 blk_queue_lld_busy(md->queue, dm_lld_busy);
2248 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
2249 2129
2250 elv_register_queue(md->queue); 2130 elv_register_queue(md->queue);
2251 2131
@@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2406 return r; 2286 return r;
2407} 2287}
2408 2288
2409static void dm_flush(struct mapped_device *md)
2410{
2411 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2412
2413 bio_init(&md->barrier_bio);
2414 md->barrier_bio.bi_bdev = md->bdev;
2415 md->barrier_bio.bi_rw = WRITE_BARRIER;
2416 __split_and_process_bio(md, &md->barrier_bio);
2417
2418 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2419}
2420
2421static void process_barrier(struct mapped_device *md, struct bio *bio)
2422{
2423 md->barrier_error = 0;
2424
2425 dm_flush(md);
2426
2427 if (!bio_empty_barrier(bio)) {
2428 __split_and_process_bio(md, bio);
2429 /*
2430 * If the request isn't supported, don't waste time with
2431 * the second flush.
2432 */
2433 if (md->barrier_error != -EOPNOTSUPP)
2434 dm_flush(md);
2435 }
2436
2437 if (md->barrier_error != DM_ENDIO_REQUEUE)
2438 bio_endio(bio, md->barrier_error);
2439 else {
2440 spin_lock_irq(&md->deferred_lock);
2441 bio_list_add_head(&md->deferred, bio);
2442 spin_unlock_irq(&md->deferred_lock);
2443 }
2444}
2445
2446/* 2289/*
2447 * Process the deferred bios 2290 * Process the deferred bios
2448 */ 2291 */
@@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work)
2452 work); 2295 work);
2453 struct bio *c; 2296 struct bio *c;
2454 2297
2455 down_write(&md->io_lock); 2298 down_read(&md->io_lock);
2456 2299
2457 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2300 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2458 spin_lock_irq(&md->deferred_lock); 2301 spin_lock_irq(&md->deferred_lock);
2459 c = bio_list_pop(&md->deferred); 2302 c = bio_list_pop(&md->deferred);
2460 spin_unlock_irq(&md->deferred_lock); 2303 spin_unlock_irq(&md->deferred_lock);
2461 2304
2462 if (!c) { 2305 if (!c)
2463 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2464 break; 2306 break;
2465 }
2466 2307
2467 up_write(&md->io_lock); 2308 up_read(&md->io_lock);
2468 2309
2469 if (dm_request_based(md)) 2310 if (dm_request_based(md))
2470 generic_make_request(c); 2311 generic_make_request(c);
2471 else { 2312 else
2472 if (c->bi_rw & REQ_HARDBARRIER) 2313 __split_and_process_bio(md, c);
2473 process_barrier(md, c);
2474 else
2475 __split_and_process_bio(md, c);
2476 }
2477 2314
2478 down_write(&md->io_lock); 2315 down_read(&md->io_lock);
2479 } 2316 }
2480 2317
2481 up_write(&md->io_lock); 2318 up_read(&md->io_lock);
2482} 2319}
2483 2320
2484static void dm_queue_flush(struct mapped_device *md) 2321static void dm_queue_flush(struct mapped_device *md)
@@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md)
2488 queue_work(md->wq, &md->work); 2325 queue_work(md->wq, &md->work);
2489} 2326}
2490 2327
2491static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
2492{
2493 struct dm_rq_target_io *tio = clone->end_io_data;
2494
2495 tio->info.target_request_nr = request_nr;
2496}
2497
2498/* Issue barrier requests to targets and wait for their completion. */
2499static int dm_rq_barrier(struct mapped_device *md)
2500{
2501 int i, j;
2502 struct dm_table *map = dm_get_live_table(md);
2503 unsigned num_targets = dm_table_get_num_targets(map);
2504 struct dm_target *ti;
2505 struct request *clone;
2506
2507 md->barrier_error = 0;
2508
2509 for (i = 0; i < num_targets; i++) {
2510 ti = dm_table_get_target(map, i);
2511 for (j = 0; j < ti->num_flush_requests; j++) {
2512 clone = clone_rq(md->flush_request, md, GFP_NOIO);
2513 dm_rq_set_target_request_nr(clone, j);
2514 atomic_inc(&md->pending[rq_data_dir(clone)]);
2515 map_request(ti, clone, md);
2516 }
2517 }
2518
2519 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2520 dm_table_put(map);
2521
2522 return md->barrier_error;
2523}
2524
2525static void dm_rq_barrier_work(struct work_struct *work)
2526{
2527 int error;
2528 struct mapped_device *md = container_of(work, struct mapped_device,
2529 barrier_work);
2530 struct request_queue *q = md->queue;
2531 struct request *rq;
2532 unsigned long flags;
2533
2534 /*
2535 * Hold the md reference here and leave it at the last part so that
2536 * the md can't be deleted by device opener when the barrier request
2537 * completes.
2538 */
2539 dm_get(md);
2540
2541 error = dm_rq_barrier(md);
2542
2543 rq = md->flush_request;
2544 md->flush_request = NULL;
2545
2546 if (error == DM_ENDIO_REQUEUE) {
2547 spin_lock_irqsave(q->queue_lock, flags);
2548 blk_requeue_request(q, rq);
2549 spin_unlock_irqrestore(q->queue_lock, flags);
2550 } else
2551 blk_end_request_all(rq, error);
2552
2553 blk_run_queue(q);
2554
2555 dm_put(md);
2556}
2557
2558/* 2328/*
2559 * Swap in a new table, returning the old one for the caller to destroy. 2329 * Swap in a new table, returning the old one for the caller to destroy.
2560 */ 2330 */
@@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2677 * 2447 *
2678 * To get all processes out of __split_and_process_bio in dm_request, 2448 * To get all processes out of __split_and_process_bio in dm_request,
2679 * we take the write lock. To prevent any process from reentering 2449 * we take the write lock. To prevent any process from reentering
2680 * __split_and_process_bio from dm_request, we set 2450 * __split_and_process_bio from dm_request and quiesce the thread
2681 * DMF_QUEUE_IO_TO_THREAD. 2451 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
2682 * 2452 * flush_workqueue(md->wq).
2683 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
2684 * and call flush_workqueue(md->wq). flush_workqueue will wait until
2685 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
2686 * further calls to __split_and_process_bio from dm_wq_work.
2687 */ 2453 */
2688 down_write(&md->io_lock); 2454 down_write(&md->io_lock);
2689 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2455 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2690 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2691 up_write(&md->io_lock); 2456 up_write(&md->io_lock);
2692 2457
2693 /* 2458 /*
2694 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which 2459 * Stop md->queue before flushing md->wq in case request-based
2695 * can be kicked until md->queue is stopped. So stop md->queue before 2460 * dm defers requests to md->wq from md->queue.
2696 * flushing md->wq.
2697 */ 2461 */
2698 if (dm_request_based(md)) 2462 if (dm_request_based(md))
2699 stop_queue(md->queue); 2463 stop_queue(md->queue);