aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-08 12:07:00 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:38 -0400
commit29e4013de7ad950280e4b220894986866697d419 (patch)
tree302e99d146940d043696f3e53b3814e65f99b269 /drivers/md
parentd87f4c14f27dc82d215108d8392a7d26687148a1 (diff)
dm: implement REQ_FLUSH/FUA support for request-based dm
This patch converts request-based dm to support the new REQ_FLUSH/FUA. The original request-based flush implementation depended on request_queue blocking other requests while a barrier sequence is in progress, which is no longer true for the new REQ_FLUSH/FUA. In general, request-based dm doesn't have infrastructure for cloning one source request to multiple targets, but the original flush implementation had a special mostly independent path which can issue flushes to multiple targets and sequence them. However, the capability isn't currently in use and adds a lot of complexity. Moreoever, it's unlikely to be useful in its current form as it doesn't make sense to be able to send out flushes to multiple targets when write requests can't be. This patch rips out special flush code path and deals handles REQ_FLUSH/FUA requests the same way as other requests. The only special treatment is that REQ_FLUSH requests use the block address 0 when finding target, which is enough for now. * added BUG_ON(!dm_target_is_valid(ti)) in dm_request_fn() as suggested by Mike Snitzer Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Mike Snitzer <snitzer@redhat.com> Tested-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm.c206
1 files changed, 22 insertions, 184 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 32e6622767ad..65114e4d9f65 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -149,20 +149,9 @@ struct mapped_device {
149 int flush_error; 149 int flush_error;
150 150
151 /* 151 /*
152 * Protect barrier_error from concurrent endio processing 152 * Processing queue (flush)
153 * in request-based dm.
154 */
155 spinlock_t barrier_error_lock;
156 int barrier_error;
157
158 /*
159 * Processing queue (flush/barriers)
160 */ 153 */
161 struct workqueue_struct *wq; 154 struct workqueue_struct *wq;
162 struct work_struct barrier_work;
163
164 /* A pointer to the currently processing pre/post flush request */
165 struct request *flush_request;
166 155
167 /* 156 /*
168 * The current mapping. 157 * The current mapping.
@@ -750,23 +739,6 @@ static void end_clone_bio(struct bio *clone, int error)
750 blk_update_request(tio->orig, 0, nr_bytes); 739 blk_update_request(tio->orig, 0, nr_bytes);
751} 740}
752 741
753static void store_barrier_error(struct mapped_device *md, int error)
754{
755 unsigned long flags;
756
757 spin_lock_irqsave(&md->barrier_error_lock, flags);
758 /*
759 * Basically, the first error is taken, but:
760 * -EOPNOTSUPP supersedes any I/O error.
761 * Requeue request supersedes any I/O error but -EOPNOTSUPP.
762 */
763 if (!md->barrier_error || error == -EOPNOTSUPP ||
764 (md->barrier_error != -EOPNOTSUPP &&
765 error == DM_ENDIO_REQUEUE))
766 md->barrier_error = error;
767 spin_unlock_irqrestore(&md->barrier_error_lock, flags);
768}
769
770/* 742/*
771 * Don't touch any member of the md after calling this function because 743 * Don't touch any member of the md after calling this function because
772 * the md may be freed in dm_put() at the end of this function. 744 * the md may be freed in dm_put() at the end of this function.
@@ -804,13 +776,11 @@ static void free_rq_clone(struct request *clone)
804static void dm_end_request(struct request *clone, int error) 776static void dm_end_request(struct request *clone, int error)
805{ 777{
806 int rw = rq_data_dir(clone); 778 int rw = rq_data_dir(clone);
807 int run_queue = 1;
808 bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
809 struct dm_rq_target_io *tio = clone->end_io_data; 779 struct dm_rq_target_io *tio = clone->end_io_data;
810 struct mapped_device *md = tio->md; 780 struct mapped_device *md = tio->md;
811 struct request *rq = tio->orig; 781 struct request *rq = tio->orig;
812 782
813 if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { 783 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
814 rq->errors = clone->errors; 784 rq->errors = clone->errors;
815 rq->resid_len = clone->resid_len; 785 rq->resid_len = clone->resid_len;
816 786
@@ -824,15 +794,8 @@ static void dm_end_request(struct request *clone, int error)
824 } 794 }
825 795
826 free_rq_clone(clone); 796 free_rq_clone(clone);
827 797 blk_end_request_all(rq, error);
828 if (unlikely(is_barrier)) { 798 rq_completed(md, rw, true);
829 if (unlikely(error))
830 store_barrier_error(md, error);
831 run_queue = 0;
832 } else
833 blk_end_request_all(rq, error);
834
835 rq_completed(md, rw, run_queue);
836} 799}
837 800
838static void dm_unprep_request(struct request *rq) 801static void dm_unprep_request(struct request *rq)
@@ -857,16 +820,6 @@ void dm_requeue_unmapped_request(struct request *clone)
857 struct request_queue *q = rq->q; 820 struct request_queue *q = rq->q;
858 unsigned long flags; 821 unsigned long flags;
859 822
860 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
861 /*
862 * Barrier clones share an original request.
863 * Leave it to dm_end_request(), which handles this special
864 * case.
865 */
866 dm_end_request(clone, DM_ENDIO_REQUEUE);
867 return;
868 }
869
870 dm_unprep_request(rq); 823 dm_unprep_request(rq);
871 824
872 spin_lock_irqsave(q->queue_lock, flags); 825 spin_lock_irqsave(q->queue_lock, flags);
@@ -956,19 +909,6 @@ static void dm_complete_request(struct request *clone, int error)
956 struct dm_rq_target_io *tio = clone->end_io_data; 909 struct dm_rq_target_io *tio = clone->end_io_data;
957 struct request *rq = tio->orig; 910 struct request *rq = tio->orig;
958 911
959 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
960 /*
961 * Barrier clones share an original request. So can't use
962 * softirq_done with the original.
963 * Pass the clone to dm_done() directly in this special case.
964 * It is safe (even if clone->q->queue_lock is held here)
965 * because there is no I/O dispatching during the completion
966 * of barrier clone.
967 */
968 dm_done(clone, error, true);
969 return;
970 }
971
972 tio->error = error; 912 tio->error = error;
973 rq->completion_data = clone; 913 rq->completion_data = clone;
974 blk_complete_request(rq); 914 blk_complete_request(rq);
@@ -985,17 +925,6 @@ void dm_kill_unmapped_request(struct request *clone, int error)
985 struct dm_rq_target_io *tio = clone->end_io_data; 925 struct dm_rq_target_io *tio = clone->end_io_data;
986 struct request *rq = tio->orig; 926 struct request *rq = tio->orig;
987 927
988 if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
989 /*
990 * Barrier clones share an original request.
991 * Leave it to dm_end_request(), which handles this special
992 * case.
993 */
994 BUG_ON(error > 0);
995 dm_end_request(clone, error);
996 return;
997 }
998
999 rq->cmd_flags |= REQ_FAILED; 928 rq->cmd_flags |= REQ_FAILED;
1000 dm_complete_request(clone, error); 929 dm_complete_request(clone, error);
1001} 930}
@@ -1536,14 +1465,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
1536 return _dm_request(q, bio); 1465 return _dm_request(q, bio);
1537} 1466}
1538 1467
1539static bool dm_rq_is_flush_request(struct request *rq)
1540{
1541 if (rq->cmd_flags & REQ_FLUSH)
1542 return true;
1543 else
1544 return false;
1545}
1546
1547void dm_dispatch_request(struct request *rq) 1468void dm_dispatch_request(struct request *rq)
1548{ 1469{
1549 int r; 1470 int r;
@@ -1591,22 +1512,15 @@ static int setup_clone(struct request *clone, struct request *rq,
1591{ 1512{
1592 int r; 1513 int r;
1593 1514
1594 if (dm_rq_is_flush_request(rq)) { 1515 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1595 blk_rq_init(NULL, clone); 1516 dm_rq_bio_constructor, tio);
1596 clone->cmd_type = REQ_TYPE_FS; 1517 if (r)
1597 clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); 1518 return r;
1598 } else {
1599 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1600 dm_rq_bio_constructor, tio);
1601 if (r)
1602 return r;
1603
1604 clone->cmd = rq->cmd;
1605 clone->cmd_len = rq->cmd_len;
1606 clone->sense = rq->sense;
1607 clone->buffer = rq->buffer;
1608 }
1609 1519
1520 clone->cmd = rq->cmd;
1521 clone->cmd_len = rq->cmd_len;
1522 clone->sense = rq->sense;
1523 clone->buffer = rq->buffer;
1610 clone->end_io = end_clone_request; 1524 clone->end_io = end_clone_request;
1611 clone->end_io_data = tio; 1525 clone->end_io_data = tio;
1612 1526
@@ -1647,9 +1561,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1647 struct mapped_device *md = q->queuedata; 1561 struct mapped_device *md = q->queuedata;
1648 struct request *clone; 1562 struct request *clone;
1649 1563
1650 if (unlikely(dm_rq_is_flush_request(rq)))
1651 return BLKPREP_OK;
1652
1653 if (unlikely(rq->special)) { 1564 if (unlikely(rq->special)) {
1654 DMWARN("Already has something in rq->special."); 1565 DMWARN("Already has something in rq->special.");
1655 return BLKPREP_KILL; 1566 return BLKPREP_KILL;
@@ -1726,6 +1637,7 @@ static void dm_request_fn(struct request_queue *q)
1726 struct dm_table *map = dm_get_live_table(md); 1637 struct dm_table *map = dm_get_live_table(md);
1727 struct dm_target *ti; 1638 struct dm_target *ti;
1728 struct request *rq, *clone; 1639 struct request *rq, *clone;
1640 sector_t pos;
1729 1641
1730 /* 1642 /*
1731 * For suspend, check blk_queue_stopped() and increment 1643 * For suspend, check blk_queue_stopped() and increment
@@ -1738,15 +1650,14 @@ static void dm_request_fn(struct request_queue *q)
1738 if (!rq) 1650 if (!rq)
1739 goto plug_and_out; 1651 goto plug_and_out;
1740 1652
1741 if (unlikely(dm_rq_is_flush_request(rq))) { 1653 /* always use block 0 to find the target for flushes for now */
1742 BUG_ON(md->flush_request); 1654 pos = 0;
1743 md->flush_request = rq; 1655 if (!(rq->cmd_flags & REQ_FLUSH))
1744 blk_start_request(rq); 1656 pos = blk_rq_pos(rq);
1745 queue_work(md->wq, &md->barrier_work); 1657
1746 goto out; 1658 ti = dm_table_find_target(map, pos);
1747 } 1659 BUG_ON(!dm_target_is_valid(ti));
1748 1660
1749 ti = dm_table_find_target(map, blk_rq_pos(rq));
1750 if (ti->type->busy && ti->type->busy(ti)) 1661 if (ti->type->busy && ti->type->busy(ti))
1751 goto plug_and_out; 1662 goto plug_and_out;
1752 1663
@@ -1917,7 +1828,6 @@ out:
1917static const struct block_device_operations dm_blk_dops; 1828static const struct block_device_operations dm_blk_dops;
1918 1829
1919static void dm_wq_work(struct work_struct *work); 1830static void dm_wq_work(struct work_struct *work);
1920static void dm_rq_barrier_work(struct work_struct *work);
1921 1831
1922static void dm_init_md_queue(struct mapped_device *md) 1832static void dm_init_md_queue(struct mapped_device *md)
1923{ 1833{
@@ -1972,7 +1882,6 @@ static struct mapped_device *alloc_dev(int minor)
1972 mutex_init(&md->suspend_lock); 1882 mutex_init(&md->suspend_lock);
1973 mutex_init(&md->type_lock); 1883 mutex_init(&md->type_lock);
1974 spin_lock_init(&md->deferred_lock); 1884 spin_lock_init(&md->deferred_lock);
1975 spin_lock_init(&md->barrier_error_lock);
1976 rwlock_init(&md->map_lock); 1885 rwlock_init(&md->map_lock);
1977 atomic_set(&md->holders, 1); 1886 atomic_set(&md->holders, 1);
1978 atomic_set(&md->open_count, 0); 1887 atomic_set(&md->open_count, 0);
@@ -1995,7 +1904,6 @@ static struct mapped_device *alloc_dev(int minor)
1995 atomic_set(&md->pending[1], 0); 1904 atomic_set(&md->pending[1], 0);
1996 init_waitqueue_head(&md->wait); 1905 init_waitqueue_head(&md->wait);
1997 INIT_WORK(&md->work, dm_wq_work); 1906 INIT_WORK(&md->work, dm_wq_work);
1998 INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1999 init_waitqueue_head(&md->eventq); 1907 init_waitqueue_head(&md->eventq);
2000 1908
2001 md->disk->major = _major; 1909 md->disk->major = _major;
@@ -2245,8 +2153,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2245 blk_queue_softirq_done(md->queue, dm_softirq_done); 2153 blk_queue_softirq_done(md->queue, dm_softirq_done);
2246 blk_queue_prep_rq(md->queue, dm_prep_fn); 2154 blk_queue_prep_rq(md->queue, dm_prep_fn);
2247 blk_queue_lld_busy(md->queue, dm_lld_busy); 2155 blk_queue_lld_busy(md->queue, dm_lld_busy);
2248 /* no flush support for request based dm yet */
2249 blk_queue_flush(md->queue, 0);
2250 2156
2251 elv_register_queue(md->queue); 2157 elv_register_queue(md->queue);
2252 2158
@@ -2483,73 +2389,6 @@ static void dm_queue_flush(struct mapped_device *md)
2483 queue_work(md->wq, &md->work); 2389 queue_work(md->wq, &md->work);
2484} 2390}
2485 2391
2486static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
2487{
2488 struct dm_rq_target_io *tio = clone->end_io_data;
2489
2490 tio->info.target_request_nr = request_nr;
2491}
2492
2493/* Issue barrier requests to targets and wait for their completion. */
2494static int dm_rq_barrier(struct mapped_device *md)
2495{
2496 int i, j;
2497 struct dm_table *map = dm_get_live_table(md);
2498 unsigned num_targets = dm_table_get_num_targets(map);
2499 struct dm_target *ti;
2500 struct request *clone;
2501
2502 md->barrier_error = 0;
2503
2504 for (i = 0; i < num_targets; i++) {
2505 ti = dm_table_get_target(map, i);
2506 for (j = 0; j < ti->num_flush_requests; j++) {
2507 clone = clone_rq(md->flush_request, md, GFP_NOIO);
2508 dm_rq_set_target_request_nr(clone, j);
2509 atomic_inc(&md->pending[rq_data_dir(clone)]);
2510 map_request(ti, clone, md);
2511 }
2512 }
2513
2514 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2515 dm_table_put(map);
2516
2517 return md->barrier_error;
2518}
2519
2520static void dm_rq_barrier_work(struct work_struct *work)
2521{
2522 int error;
2523 struct mapped_device *md = container_of(work, struct mapped_device,
2524 barrier_work);
2525 struct request_queue *q = md->queue;
2526 struct request *rq;
2527 unsigned long flags;
2528
2529 /*
2530 * Hold the md reference here and leave it at the last part so that
2531 * the md can't be deleted by device opener when the barrier request
2532 * completes.
2533 */
2534 dm_get(md);
2535
2536 error = dm_rq_barrier(md);
2537
2538 rq = md->flush_request;
2539 md->flush_request = NULL;
2540
2541 if (error == DM_ENDIO_REQUEUE) {
2542 spin_lock_irqsave(q->queue_lock, flags);
2543 blk_requeue_request(q, rq);
2544 spin_unlock_irqrestore(q->queue_lock, flags);
2545 } else
2546 blk_end_request_all(rq, error);
2547
2548 blk_run_queue(q);
2549
2550 dm_put(md);
2551}
2552
2553/* 2392/*
2554 * Swap in a new table, returning the old one for the caller to destroy. 2393 * Swap in a new table, returning the old one for the caller to destroy.
2555 */ 2394 */
@@ -2686,9 +2525,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2686 up_write(&md->io_lock); 2525 up_write(&md->io_lock);
2687 2526
2688 /* 2527 /*
2689 * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which 2528 * Stop md->queue before flushing md->wq in case request-based
2690 * can be kicked until md->queue is stopped. So stop md->queue before 2529 * dm defers requests to md->wq from md->queue.
2691 * flushing md->wq.
2692 */ 2530 */
2693 if (dm_request_based(md)) 2531 if (dm_request_based(md))
2694 stop_queue(md->queue); 2532 stop_queue(md->queue);