dm: implement REQ_FLUSH/FUA support for request-based dm

This patch converts request-based dm to support the new REQ_FLUSH/FUA. The original request-based flush implementation depended on request_queue blocking other requests while a barrier sequence is in progress, which is no longer true for the new REQ_FLUSH/FUA. In general, request-based dm doesn't have infrastructure for cloning one source request to multiple targets, but the original flush implementation had a special mostly independent path which can issue flushes to multiple targets and sequence them. However, the capability isn't currently in use and adds a lot of complexity. Moreoever, it's unlikely to be useful in its current form as it doesn't make sense to be able to send out flushes to multiple targets when write requests can't be. This patch rips out special flush code path and deals handles REQ_FLUSH/FUA requests the same way as other requests. The only special treatment is that REQ_FLUSH requests use the block address 0 when finding target, which is enough for now. * added BUG_ON(!dm_target_is_valid(ti)) in dm_request_fn() as suggested by Mike Snitzer Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Mike Snitzer <snitzer@redhat.com> Tested-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
author: Tejun Heo <tj@kernel.org> 2010-09-08 12:07:00 -0400
committer: Jens Axboe <jaxboe@fusionio.com> 2010-09-10 06:35:38 -0400
commit: 29e4013de7ad950280e4b220894986866697d419 (patch)
tree: 302e99d146940d043696f3e53b3814e65f99b269
parent: d87f4c14f27dc82d215108d8392a7d26687148a1 (diff)
1 files changed, 22 insertions, 184 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 32e6622767ad..65114e4d9f65 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -149,20 +149,9 @@ struct mapped_device {
        int flush_error;
        /*
-         * Protect barrier_error from concurrent endio processing
+         * Processing queue (flush)
-         * in request-based dm.
-         */
-        spinlock_t barrier_error_lock;
-        int barrier_error;
-        /*
-         * Processing queue (flush/barriers)
         */
        struct workqueue_struct *wq;
-        struct work_struct barrier_work;
-        /* A pointer to the currently processing pre/post flush request */
-        struct request *flush_request;
        /*
         * The current mapping.
@@ -750,23 +739,6 @@ static void end_clone_bio(struct bio *clone, int error)
        blk_update_request(tio->orig, 0, nr_bytes);
 }
-static void store_barrier_error(struct mapped_device *md, int error)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&md->barrier_error_lock, flags);
-        /*
-         * Basically, the first error is taken, but:
-         *   -EOPNOTSUPP supersedes any I/O error.
-         *   Requeue request supersedes any I/O error but -EOPNOTSUPP.
-         */
-        if (!md->barrier_error || error == -EOPNOTSUPP ||
-            (md->barrier_error != -EOPNOTSUPP &&
-             error == DM_ENDIO_REQUEUE))
-                md->barrier_error = error;
-        spin_unlock_irqrestore(&md->barrier_error_lock, flags);
-}
 /*
 * Don't touch any member of the md after calling this function because
 * the md may be freed in dm_put() at the end of this function.
@@ -804,13 +776,11 @@ static void free_rq_clone(struct request *clone)
 static void dm_end_request(struct request *clone, int error)
 {
        int rw = rq_data_dir(clone);
-        int run_queue = 1;
-        bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct mapped_device *md = tio->md;
        struct request *rq = tio->orig;
-        if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
+        if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                rq->errors = clone->errors;
                rq->resid_len = clone->resid_len;
@@ -824,15 +794,8 @@ static void dm_end_request(struct request *clone, int error)
        }
        free_rq_clone(clone);
+        blk_end_request_all(rq, error);
-        if (unlikely(is_barrier)) {
+        rq_completed(md, rw, true);
-                if (unlikely(error))
-                        store_barrier_error(md, error);
-                run_queue = 0;
-        } else
-                blk_end_request_all(rq, error);
-        rq_completed(md, rw, run_queue);
 }
 static void dm_unprep_request(struct request *rq)
@@ -857,16 +820,6 @@ void dm_requeue_unmapped_request(struct request *clone)
        struct request_queue *q = rq->q;
        unsigned long flags;
-        if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
-                /*
-                 * Barrier clones share an original request.
-                 * Leave it to dm_end_request(), which handles this special
-                 * case.
-                 */
-                dm_end_request(clone, DM_ENDIO_REQUEUE);
-                return;
-        }
        dm_unprep_request(rq);
        spin_lock_irqsave(q->queue_lock, flags);
@@ -956,19 +909,6 @@ static void dm_complete_request(struct request *clone, int error)
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct request *rq = tio->orig;
-        if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
-                /*
-                 * Barrier clones share an original request.  So can't use
-                 * softirq_done with the original.
-                 * Pass the clone to dm_done() directly in this special case.
-                 * It is safe (even if clone->q->queue_lock is held here)
-                 * because there is no I/O dispatching during the completion
-                 * of barrier clone.
-                 */
-                dm_done(clone, error, true);
-                return;
-        }
        tio->error = error;
        rq->completion_data = clone;
        blk_complete_request(rq);
@@ -985,17 +925,6 @@ void dm_kill_unmapped_request(struct request *clone, int error)
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct request *rq = tio->orig;
-        if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
-                /*
-                 * Barrier clones share an original request.
-                 * Leave it to dm_end_request(), which handles this special
-                 * case.
-                 */
-                BUG_ON(error > 0);
-                dm_end_request(clone, error);
-                return;
-        }
        rq->cmd_flags |= REQ_FAILED;
        dm_complete_request(clone, error);
 }
@@ -1536,14 +1465,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
        return _dm_request(q, bio);
 }
-static bool dm_rq_is_flush_request(struct request *rq)
-{
-        if (rq->cmd_flags & REQ_FLUSH)
-                return true;
-        else
-                return false;
-}
 void dm_dispatch_request(struct request *rq)
 {
        int r;
@@ -1591,22 +1512,15 @@ static int setup_clone(struct request *clone, struct request *rq,
 {
        int r;
-        if (dm_rq_is_flush_request(rq)) {
+        r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
-                blk_rq_init(NULL, clone);
+                              dm_rq_bio_constructor, tio);
-                clone->cmd_type = REQ_TYPE_FS;
+        if (r)
-                clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
+                return r;
-        } else {
-                r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
-                                      dm_rq_bio_constructor, tio);
-                if (r)
-                        return r;
-                clone->cmd = rq->cmd;
-                clone->cmd_len = rq->cmd_len;
-                clone->sense = rq->sense;
-                clone->buffer = rq->buffer;
-        }
+        clone->cmd = rq->cmd;
+        clone->cmd_len = rq->cmd_len;
+        clone->sense = rq->sense;
+        clone->buffer = rq->buffer;
        clone->end_io = end_clone_request;
        clone->end_io_data = tio;
@@ -1647,9 +1561,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
        struct mapped_device *md = q->queuedata;
        struct request *clone;
-        if (unlikely(dm_rq_is_flush_request(rq)))
-                return BLKPREP_OK;
        if (unlikely(rq->special)) {
                DMWARN("Already has something in rq->special.");
                return BLKPREP_KILL;
@@ -1726,6 +1637,7 @@ static void dm_request_fn(struct request_queue *q)
        struct dm_table *map = dm_get_live_table(md);
        struct dm_target *ti;
        struct request *rq, *clone;
+        sector_t pos;
        /*
         * For suspend, check blk_queue_stopped() and increment
@@ -1738,15 +1650,14 @@ static void dm_request_fn(struct request_queue *q)
                if (!rq)
                        goto plug_and_out;
-                if (unlikely(dm_rq_is_flush_request(rq))) {
+                /* always use block 0 to find the target for flushes for now */
-                        BUG_ON(md->flush_request);
+                pos = 0;
-                        md->flush_request = rq;
+                if (!(rq->cmd_flags & REQ_FLUSH))
-                        blk_start_request(rq);
+                        pos = blk_rq_pos(rq);
-                        queue_work(md->wq, &md->barrier_work);
-                        goto out;
+                ti = dm_table_find_target(map, pos);
-                }
+                BUG_ON(!dm_target_is_valid(ti));
-                ti = dm_table_find_target(map, blk_rq_pos(rq));
                if (ti->type->busy && ti->type->busy(ti))
                        goto plug_and_out;
@@ -1917,7 +1828,6 @@ out:
 static const struct block_device_operations dm_blk_dops;
 static void dm_wq_work(struct work_struct *work);
-static void dm_rq_barrier_work(struct work_struct *work);
 static void dm_init_md_queue(struct mapped_device *md)
 {
@@ -1972,7 +1882,6 @@ static struct mapped_device *alloc_dev(int minor)
        mutex_init(&md->suspend_lock);
        mutex_init(&md->type_lock);
        spin_lock_init(&md->deferred_lock);
-        spin_lock_init(&md->barrier_error_lock);
        rwlock_init(&md->map_lock);
        atomic_set(&md->holders, 1);
        atomic_set(&md->open_count, 0);
@@ -1995,7 +1904,6 @@ static struct mapped_device *alloc_dev(int minor)
        atomic_set(&md->pending[1], 0);
        init_waitqueue_head(&md->wait);
        INIT_WORK(&md->work, dm_wq_work);
-        INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
        init_waitqueue_head(&md->eventq);
        md->disk->major = _major;
@@ -2245,8 +2153,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
        blk_queue_softirq_done(md->queue, dm_softirq_done);
        blk_queue_prep_rq(md->queue, dm_prep_fn);
        blk_queue_lld_busy(md->queue, dm_lld_busy);
-        /* no flush support for request based dm yet */
-        blk_queue_flush(md->queue, 0);
        elv_register_queue(md->queue);
@@ -2483,73 +2389,6 @@ static void dm_queue_flush(struct mapped_device *md)
        queue_work(md->wq, &md->work);
 }
-static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
-{
-        struct dm_rq_target_io *tio = clone->end_io_data;
-        tio->info.target_request_nr = request_nr;
-}
-/* Issue barrier requests to targets and wait for their completion. */
-static int dm_rq_barrier(struct mapped_device *md)
-{
-        int i, j;
-        struct dm_table *map = dm_get_live_table(md);
-        unsigned num_targets = dm_table_get_num_targets(map);
-        struct dm_target *ti;
-        struct request *clone;
-        md->barrier_error = 0;
-        for (i = 0; i < num_targets; i++) {
-                ti = dm_table_get_target(map, i);
-                for (j = 0; j < ti->num_flush_requests; j++) {
-                        clone = clone_rq(md->flush_request, md, GFP_NOIO);
-                        dm_rq_set_target_request_nr(clone, j);
-                        atomic_inc(&md->pending[rq_data_dir(clone)]);
-                        map_request(ti, clone, md);
-                }
-        }
-        dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
-        dm_table_put(map);
-        return md->barrier_error;
-}
-static void dm_rq_barrier_work(struct work_struct *work)
-{
-        int error;
-        struct mapped_device *md = container_of(work, struct mapped_device,
-                                                barrier_work);
-        struct request_queue *q = md->queue;
-        struct request *rq;
-        unsigned long flags;
-        /*
-         * Hold the md reference here and leave it at the last part so that
-         * the md can't be deleted by device opener when the barrier request
-         * completes.
-         */
-        dm_get(md);
-        error = dm_rq_barrier(md);
-        rq = md->flush_request;
-        md->flush_request = NULL;
-        if (error == DM_ENDIO_REQUEUE) {
-                spin_lock_irqsave(q->queue_lock, flags);
-                blk_requeue_request(q, rq);
-                spin_unlock_irqrestore(q->queue_lock, flags);
-        } else
-                blk_end_request_all(rq, error);
-        blk_run_queue(q);
-        dm_put(md);
-}
 /*
 * Swap in a new table, returning the old one for the caller to destroy.
 */
@@ -2686,9 +2525,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
        up_write(&md->io_lock);
        /*
-         * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
+         * Stop md->queue before flushing md->wq in case request-based
-         * can be kicked until md->queue is stopped.  So stop md->queue before
+         * dm defers requests to md->wq from md->queue.
-         * flushing md->wq.
         */
        if (dm_request_based(md))
                stop_queue(md->queue);
author	Tejun Heo <tj@kernel.org>	2010-09-08 12:07:00 -0400
committer	Jens Axboe <jaxboe@fusionio.com>	2010-09-10 06:35:38 -0400
commit	29e4013de7ad950280e4b220894986866697d419 (patch)
tree	302e99d146940d043696f3e53b3814e65f99b269
parent	d87f4c14f27dc82d215108d8392a7d26687148a1 (diff)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 32e6622767ad..65114e4d9f65 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c
@@ -149,20 +149,9 @@ struct mapped_device {
149	int flush_error;	149	int flush_error;
150		150
151	/*	151	/*
152	* Protect barrier_error from concurrent endio processing	152	* Processing queue (flush)
153	* in request-based dm.
154	*/
155	spinlock_t barrier_error_lock;
156	int barrier_error;
157
158	/*
159	* Processing queue (flush/barriers)
160	*/	153	*/
161	struct workqueue_struct *wq;	154	struct workqueue_struct *wq;
162	struct work_struct barrier_work;
163
164	/* A pointer to the currently processing pre/post flush request */
165	struct request *flush_request;
166		155
167	/*	156	/*
168	* The current mapping.	157	* The current mapping.
@@ -750,23 +739,6 @@ static void end_clone_bio(struct bio *clone, int error)
750	blk_update_request(tio->orig, 0, nr_bytes);	739	blk_update_request(tio->orig, 0, nr_bytes);
751	}	740	}
752		741
753	static void store_barrier_error(struct mapped_device *md, int error)
754	{
755	unsigned long flags;
756
757	spin_lock_irqsave(&md->barrier_error_lock, flags);
758	/*
759	* Basically, the first error is taken, but:
760	* -EOPNOTSUPP supersedes any I/O error.
761	* Requeue request supersedes any I/O error but -EOPNOTSUPP.
762	*/
763	if (!md->barrier_error \|\| error == -EOPNOTSUPP \|\|
764	(md->barrier_error != -EOPNOTSUPP &&
765	error == DM_ENDIO_REQUEUE))
766	md->barrier_error = error;
767	spin_unlock_irqrestore(&md->barrier_error_lock, flags);
768	}
769
770	/*	742	/*
771	* Don't touch any member of the md after calling this function because	743	* Don't touch any member of the md after calling this function because
772	* the md may be freed in dm_put() at the end of this function.	744	* the md may be freed in dm_put() at the end of this function.
@@ -804,13 +776,11 @@ static void free_rq_clone(struct request *clone)
804	static void dm_end_request(struct request *clone, int error)	776	static void dm_end_request(struct request *clone, int error)
805	{	777	{
806	int rw = rq_data_dir(clone);	778	int rw = rq_data_dir(clone);
807	int run_queue = 1;
808	bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
809	struct dm_rq_target_io *tio = clone->end_io_data;	779	struct dm_rq_target_io *tio = clone->end_io_data;
810	struct mapped_device *md = tio->md;	780	struct mapped_device *md = tio->md;
811	struct request *rq = tio->orig;	781	struct request *rq = tio->orig;
812		782
813	if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {	783	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
814	rq->errors = clone->errors;	784	rq->errors = clone->errors;
815	rq->resid_len = clone->resid_len;	785	rq->resid_len = clone->resid_len;
816		786
@@ -824,15 +794,8 @@ static void dm_end_request(struct request *clone, int error)
824	}	794	}
825		795
826	free_rq_clone(clone);	796	free_rq_clone(clone);
827		797	blk_end_request_all(rq, error);
828	if (unlikely(is_barrier)) {	798	rq_completed(md, rw, true);
829	if (unlikely(error))
830	store_barrier_error(md, error);
831	run_queue = 0;
832	} else
833	blk_end_request_all(rq, error);
834
835	rq_completed(md, rw, run_queue);
836	}	799	}
837		800
838	static void dm_unprep_request(struct request *rq)	801	static void dm_unprep_request(struct request *rq)
@@ -857,16 +820,6 @@ void dm_requeue_unmapped_request(struct request *clone)
857	struct request_queue *q = rq->q;	820	struct request_queue *q = rq->q;
858	unsigned long flags;	821	unsigned long flags;
859		822
860	if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
861	/*
862	* Barrier clones share an original request.
863	* Leave it to dm_end_request(), which handles this special
864	* case.
865	*/
866	dm_end_request(clone, DM_ENDIO_REQUEUE);
867	return;
868	}
869
870	dm_unprep_request(rq);	823	dm_unprep_request(rq);
871		824
872	spin_lock_irqsave(q->queue_lock, flags);	825	spin_lock_irqsave(q->queue_lock, flags);
@@ -956,19 +909,6 @@ static void dm_complete_request(struct request *clone, int error)
956	struct dm_rq_target_io *tio = clone->end_io_data;	909	struct dm_rq_target_io *tio = clone->end_io_data;
957	struct request *rq = tio->orig;	910	struct request *rq = tio->orig;
958		911
959	if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
960	/*
961	* Barrier clones share an original request. So can't use
962	* softirq_done with the original.
963	* Pass the clone to dm_done() directly in this special case.
964	* It is safe (even if clone->q->queue_lock is held here)
965	* because there is no I/O dispatching during the completion
966	* of barrier clone.
967	*/
968	dm_done(clone, error, true);
969	return;
970	}
971
972	tio->error = error;	912	tio->error = error;
973	rq->completion_data = clone;	913	rq->completion_data = clone;
974	blk_complete_request(rq);	914	blk_complete_request(rq);
@@ -985,17 +925,6 @@ void dm_kill_unmapped_request(struct request *clone, int error)
985	struct dm_rq_target_io *tio = clone->end_io_data;	925	struct dm_rq_target_io *tio = clone->end_io_data;
986	struct request *rq = tio->orig;	926	struct request *rq = tio->orig;
987		927
988	if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
989	/*
990	* Barrier clones share an original request.
991	* Leave it to dm_end_request(), which handles this special
992	* case.
993	*/
994	BUG_ON(error > 0);
995	dm_end_request(clone, error);
996	return;
997	}
998
999	rq->cmd_flags \|= REQ_FAILED;	928	rq->cmd_flags \|= REQ_FAILED;
1000	dm_complete_request(clone, error);	929	dm_complete_request(clone, error);
1001	}	930	}
@@ -1536,14 +1465,6 @@ static int dm_request(struct request_queue q, struct bio bio)
1536	return _dm_request(q, bio);	1465	return _dm_request(q, bio);
1537	}	1466	}
1538		1467
1539	static bool dm_rq_is_flush_request(struct request *rq)
1540	{
1541	if (rq->cmd_flags & REQ_FLUSH)
1542	return true;
1543	else
1544	return false;
1545	}
1546
1547	void dm_dispatch_request(struct request *rq)	1468	void dm_dispatch_request(struct request *rq)
1548	{	1469	{
1549	int r;	1470	int r;
@@ -1591,22 +1512,15 @@ static int setup_clone(struct request clone, struct request rq,
1591	{	1512	{
1592	int r;	1513	int r;
1593		1514
1594	if (dm_rq_is_flush_request(rq)) {	1515	r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1595	blk_rq_init(NULL, clone);	1516	dm_rq_bio_constructor, tio);
1596	clone->cmd_type = REQ_TYPE_FS;	1517	if (r)
1597	clone->cmd_flags \|= (REQ_HARDBARRIER \| WRITE);	1518	return r;
1598	} else {
1599	r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1600	dm_rq_bio_constructor, tio);
1601	if (r)
1602	return r;
1603
1604	clone->cmd = rq->cmd;
1605	clone->cmd_len = rq->cmd_len;
1606	clone->sense = rq->sense;
1607	clone->buffer = rq->buffer;
1608	}
1609		1519
		1520	clone->cmd = rq->cmd;
		1521	clone->cmd_len = rq->cmd_len;
		1522	clone->sense = rq->sense;
		1523	clone->buffer = rq->buffer;
1610	clone->end_io = end_clone_request;	1524	clone->end_io = end_clone_request;
1611	clone->end_io_data = tio;	1525	clone->end_io_data = tio;
1612		1526
@@ -1647,9 +1561,6 @@ static int dm_prep_fn(struct request_queue q, struct request rq)
1647	struct mapped_device *md = q->queuedata;	1561	struct mapped_device *md = q->queuedata;
1648	struct request *clone;	1562	struct request *clone;
1649		1563
1650	if (unlikely(dm_rq_is_flush_request(rq)))
1651	return BLKPREP_OK;
1652
1653	if (unlikely(rq->special)) {	1564	if (unlikely(rq->special)) {
1654	DMWARN("Already has something in rq->special.");	1565	DMWARN("Already has something in rq->special.");
1655	return BLKPREP_KILL;	1566	return BLKPREP_KILL;
@@ -1726,6 +1637,7 @@ static void dm_request_fn(struct request_queue *q)
1726	struct dm_table *map = dm_get_live_table(md);	1637	struct dm_table *map = dm_get_live_table(md);
1727	struct dm_target *ti;	1638	struct dm_target *ti;
1728	struct request rq, clone;	1639	struct request rq, clone;
		1640	sector_t pos;
1729		1641
1730	/*	1642	/*
1731	* For suspend, check blk_queue_stopped() and increment	1643	* For suspend, check blk_queue_stopped() and increment
@@ -1738,15 +1650,14 @@ static void dm_request_fn(struct request_queue *q)
1738	if (!rq)	1650	if (!rq)
1739	goto plug_and_out;	1651	goto plug_and_out;
1740		1652
1741	if (unlikely(dm_rq_is_flush_request(rq))) {	1653	/* always use block 0 to find the target for flushes for now */
1742	BUG_ON(md->flush_request);	1654	pos = 0;
1743	md->flush_request = rq;	1655	if (!(rq->cmd_flags & REQ_FLUSH))
1744	blk_start_request(rq);	1656	pos = blk_rq_pos(rq);
1745	queue_work(md->wq, &md->barrier_work);	1657
1746	goto out;	1658	ti = dm_table_find_target(map, pos);
1747	}	1659	BUG_ON(!dm_target_is_valid(ti));
1748		1660
1749	ti = dm_table_find_target(map, blk_rq_pos(rq));
1750	if (ti->type->busy && ti->type->busy(ti))	1661	if (ti->type->busy && ti->type->busy(ti))
1751	goto plug_and_out;	1662	goto plug_and_out;
1752		1663
@@ -1917,7 +1828,6 @@ out:
1917	static const struct block_device_operations dm_blk_dops;	1828	static const struct block_device_operations dm_blk_dops;
1918		1829
1919	static void dm_wq_work(struct work_struct *work);	1830	static void dm_wq_work(struct work_struct *work);
1920	static void dm_rq_barrier_work(struct work_struct *work);
1921		1831
1922	static void dm_init_md_queue(struct mapped_device *md)	1832	static void dm_init_md_queue(struct mapped_device *md)
1923	{	1833	{
@@ -1972,7 +1882,6 @@ static struct mapped_device *alloc_dev(int minor)
1972	mutex_init(&md->suspend_lock);	1882	mutex_init(&md->suspend_lock);
1973	mutex_init(&md->type_lock);	1883	mutex_init(&md->type_lock);
1974	spin_lock_init(&md->deferred_lock);	1884	spin_lock_init(&md->deferred_lock);
1975	spin_lock_init(&md->barrier_error_lock);
1976	rwlock_init(&md->map_lock);	1885	rwlock_init(&md->map_lock);
1977	atomic_set(&md->holders, 1);	1886	atomic_set(&md->holders, 1);
1978	atomic_set(&md->open_count, 0);	1887	atomic_set(&md->open_count, 0);
@@ -1995,7 +1904,6 @@ static struct mapped_device *alloc_dev(int minor)
1995	atomic_set(&md->pending[1], 0);	1904	atomic_set(&md->pending[1], 0);
1996	init_waitqueue_head(&md->wait);	1905	init_waitqueue_head(&md->wait);
1997	INIT_WORK(&md->work, dm_wq_work);	1906	INIT_WORK(&md->work, dm_wq_work);
1998	INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
1999	init_waitqueue_head(&md->eventq);	1907	init_waitqueue_head(&md->eventq);
2000		1908
2001	md->disk->major = _major;	1909	md->disk->major = _major;
@@ -2245,8 +2153,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2245	blk_queue_softirq_done(md->queue, dm_softirq_done);	2153	blk_queue_softirq_done(md->queue, dm_softirq_done);
2246	blk_queue_prep_rq(md->queue, dm_prep_fn);	2154	blk_queue_prep_rq(md->queue, dm_prep_fn);
2247	blk_queue_lld_busy(md->queue, dm_lld_busy);	2155	blk_queue_lld_busy(md->queue, dm_lld_busy);
2248	/* no flush support for request based dm yet */
2249	blk_queue_flush(md->queue, 0);
2250		2156
2251	elv_register_queue(md->queue);	2157	elv_register_queue(md->queue);
2252		2158
@@ -2483,73 +2389,6 @@ static void dm_queue_flush(struct mapped_device *md)
2483	queue_work(md->wq, &md->work);	2389	queue_work(md->wq, &md->work);
2484	}	2390	}
2485		2391
2486	static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
2487	{
2488	struct dm_rq_target_io *tio = clone->end_io_data;
2489
2490	tio->info.target_request_nr = request_nr;
2491	}
2492
2493	/* Issue barrier requests to targets and wait for their completion. */
2494	static int dm_rq_barrier(struct mapped_device *md)
2495	{
2496	int i, j;
2497	struct dm_table *map = dm_get_live_table(md);
2498	unsigned num_targets = dm_table_get_num_targets(map);
2499	struct dm_target *ti;
2500	struct request *clone;
2501
2502	md->barrier_error = 0;
2503
2504	for (i = 0; i < num_targets; i++) {
2505	ti = dm_table_get_target(map, i);
2506	for (j = 0; j < ti->num_flush_requests; j++) {
2507	clone = clone_rq(md->flush_request, md, GFP_NOIO);
2508	dm_rq_set_target_request_nr(clone, j);
2509	atomic_inc(&md->pending[rq_data_dir(clone)]);
2510	map_request(ti, clone, md);
2511	}
2512	}
2513
2514	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2515	dm_table_put(map);
2516
2517	return md->barrier_error;
2518	}
2519
2520	static void dm_rq_barrier_work(struct work_struct *work)
2521	{
2522	int error;
2523	struct mapped_device *md = container_of(work, struct mapped_device,
2524	barrier_work);
2525	struct request_queue *q = md->queue;
2526	struct request *rq;
2527	unsigned long flags;
2528
2529	/*
2530	* Hold the md reference here and leave it at the last part so that
2531	* the md can't be deleted by device opener when the barrier request
2532	* completes.
2533	*/
2534	dm_get(md);
2535
2536	error = dm_rq_barrier(md);
2537
2538	rq = md->flush_request;
2539	md->flush_request = NULL;
2540
2541	if (error == DM_ENDIO_REQUEUE) {
2542	spin_lock_irqsave(q->queue_lock, flags);
2543	blk_requeue_request(q, rq);
2544	spin_unlock_irqrestore(q->queue_lock, flags);
2545	} else
2546	blk_end_request_all(rq, error);
2547
2548	blk_run_queue(q);
2549
2550	dm_put(md);
2551	}
2552
2553	/*	2392	/*
2554	* Swap in a new table, returning the old one for the caller to destroy.	2393	* Swap in a new table, returning the old one for the caller to destroy.
2555	*/	2394	*/
@@ -2686,9 +2525,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2686	up_write(&md->io_lock);	2525	up_write(&md->io_lock);
2687		2526
2688	/*	2527	/*
2689	* Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which	2528	* Stop md->queue before flushing md->wq in case request-based
2690	* can be kicked until md->queue is stopped. So stop md->queue before	2529	* dm defers requests to md->wq from md->queue.
2691	* flushing md->wq.
2692	*/	2530	*/
2693	if (dm_request_based(md))	2531	if (dm_request_based(md))
2694	stop_queue(md->queue);	2532	stop_queue(md->queue);