block: fix elvpriv allocation failure handling

Request allocation is mempool backed to guarantee forward progress under memory pressure; unfortunately, this property got broken while adding elvpriv data. Failures during elvpriv allocation, including ioc and icq creation failures, currently make get_request() fail as whole. There's no forward progress guarantee for these allocations - they may fail indefinitely under memory pressure stalling IO and deadlocking the system. This patch updates get_request() such that elvpriv allocation failure doesn't make the whole function fail. If elvpriv allocation fails, the allocation is degraded into !ELVPRIV. This will force the request to ELEVATOR_INSERT_BACK disturbing scheduling but elvpriv alloc failures should be rare (nothing is per-request) and anything is better than deadlocking. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Tejun Heo <tj@kernel.org> 2012-04-19 19:29:22 -0400
committer: Jens Axboe <axboe@kernel.dk> 2012-04-20 04:06:40 -0400
commit: aaf7c680682f1999ef2e574f743c45d1674a8b8a (patch)
tree: 82d597ddfa7456553262435174948d9fb03fffef /block
parent: 29e2b09ab5fa790514d47838f3c05497130908b3 (diff)
1 files changed, 36 insertions, 17 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index f6f68b0c8302..6cf13df43c80 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -29,6 +29,7 @@
 #include <linux/fault-inject.h>
 #include <linux/list_sort.h>
 #include <linux/delay.h>
+#include <linux/ratelimit.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -930,17 +931,6 @@ retry:
                rw_flags |= REQ_IO_STAT;
        spin_unlock_irq(q->queue_lock);
-        /* create icq if missing */
-        if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
-                create_io_context(gfp_mask, q->node);
-                ioc = rq_ioc(bio);
-                if (!ioc)
-                        goto fail_alloc;
-                icq = ioc_create_icq(ioc, q, gfp_mask);
-                if (!icq)
-                        goto fail_alloc;
-        }
        /* allocate and init request */
        rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
        if (!rq)
@@ -949,17 +939,28 @@ retry:
        blk_rq_init(q, rq);
        rq->cmd_flags = rw_flags | REQ_ALLOCED;
+        /* init elvpriv */
        if (rw_flags & REQ_ELVPRIV) {
-                rq->elv.icq = icq;
+                if (unlikely(et->icq_cache && !icq)) {
-                if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+                        create_io_context(gfp_mask, q->node);
-                        mempool_free(rq, q->rq.rq_pool);
+                        ioc = rq_ioc(bio);
-                        goto fail_alloc;
+                        if (!ioc)
+                                goto fail_elvpriv;
+                        icq = ioc_create_icq(ioc, q, gfp_mask);
+                        if (!icq)
+                                goto fail_elvpriv;
                }
-                /* @rq->elv.icq holds on to io_context until @rq is freed */
+                rq->elv.icq = icq;
+                if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
+                        goto fail_elvpriv;
+                /* @rq->elv.icq holds io_context until @rq is freed */
                if (icq)
                        get_io_context(icq->ioc);
        }
+out:
        /*
         * ioc may be NULL here, and ioc_batching will be false. That's
         * OK, if the queue is under the request limit then requests need
@@ -972,6 +973,24 @@ retry:
        trace_block_getrq(q, bio, rw_flags & 1);
        return rq;
+fail_elvpriv:
+        /*
+         * elvpriv init failed.  ioc, icq and elvpriv aren't mempool backed
+         * and may fail indefinitely under memory pressure and thus
+         * shouldn't stall IO.  Treat this request as !elvpriv.  This will
+         * disturb iosched and blkcg but weird is bettern than dead.
+         */
+        printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n",
+                           dev_name(q->backing_dev_info.dev));
+        rq->cmd_flags &= ~REQ_ELVPRIV;
+        rq->elv.icq = NULL;
+        spin_lock_irq(q->queue_lock);
+        rl->elvpriv--;
+        spin_unlock_irq(q->queue_lock);
+        goto out;
 fail_alloc:
        /*
         * Allocation failed presumably due to memory. Undo anything we
author	Tejun Heo <tj@kernel.org>	2012-04-19 19:29:22 -0400
committer	Jens Axboe <axboe@kernel.dk>	2012-04-20 04:06:40 -0400
commit	aaf7c680682f1999ef2e574f743c45d1674a8b8a (patch)
tree	82d597ddfa7456553262435174948d9fb03fffef /block
parent	29e2b09ab5fa790514d47838f3c05497130908b3 (diff)

diff --git a/block/blk-core.c b/block/blk-core.c index f6f68b0c8302..6cf13df43c80 100644 --- a/block/blk-core.c +++ b/block/blk-core.c
@@ -29,6 +29,7 @@
29	#include <linux/fault-inject.h>	29	#include <linux/fault-inject.h>
30	#include <linux/list_sort.h>	30	#include <linux/list_sort.h>
31	#include <linux/delay.h>	31	#include <linux/delay.h>
		32	#include <linux/ratelimit.h>
32		33
33	#define CREATE_TRACE_POINTS	34	#define CREATE_TRACE_POINTS
34	#include <trace/events/block.h>	35	#include <trace/events/block.h>
@@ -930,17 +931,6 @@ retry:
930	rw_flags \|= REQ_IO_STAT;	931	rw_flags \|= REQ_IO_STAT;
931	spin_unlock_irq(q->queue_lock);	932	spin_unlock_irq(q->queue_lock);
932		933
933	/* create icq if missing */
934	if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
935	create_io_context(gfp_mask, q->node);
936	ioc = rq_ioc(bio);
937	if (!ioc)
938	goto fail_alloc;
939	icq = ioc_create_icq(ioc, q, gfp_mask);
940	if (!icq)
941	goto fail_alloc;
942	}
943
944	/* allocate and init request */	934	/* allocate and init request */
945	rq = mempool_alloc(q->rq.rq_pool, gfp_mask);	935	rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
946	if (!rq)	936	if (!rq)
@@ -949,17 +939,28 @@ retry:
949	blk_rq_init(q, rq);	939	blk_rq_init(q, rq);
950	rq->cmd_flags = rw_flags \| REQ_ALLOCED;	940	rq->cmd_flags = rw_flags \| REQ_ALLOCED;
951		941
		942	/* init elvpriv */
952	if (rw_flags & REQ_ELVPRIV) {	943	if (rw_flags & REQ_ELVPRIV) {
953	rq->elv.icq = icq;	944	if (unlikely(et->icq_cache && !icq)) {
954	if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {	945	create_io_context(gfp_mask, q->node);
955	mempool_free(rq, q->rq.rq_pool);	946	ioc = rq_ioc(bio);
956	goto fail_alloc;	947	if (!ioc)
		948	goto fail_elvpriv;
		949
		950	icq = ioc_create_icq(ioc, q, gfp_mask);
		951	if (!icq)
		952	goto fail_elvpriv;
957	}	953	}
958	/* @rq->elv.icq holds on to io_context until @rq is freed */	954
		955	rq->elv.icq = icq;
		956	if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
		957	goto fail_elvpriv;
		958
		959	/* @rq->elv.icq holds io_context until @rq is freed */
959	if (icq)	960	if (icq)
960	get_io_context(icq->ioc);	961	get_io_context(icq->ioc);
961	}	962	}
962		963	out:
963	/*	964	/*
964	* ioc may be NULL here, and ioc_batching will be false. That's	965	* ioc may be NULL here, and ioc_batching will be false. That's
965	* OK, if the queue is under the request limit then requests need	966	* OK, if the queue is under the request limit then requests need
@@ -972,6 +973,24 @@ retry:
972	trace_block_getrq(q, bio, rw_flags & 1);	973	trace_block_getrq(q, bio, rw_flags & 1);
973	return rq;	974	return rq;
974		975
		976	fail_elvpriv:
		977	/*
		978	* elvpriv init failed. ioc, icq and elvpriv aren't mempool backed
		979	* and may fail indefinitely under memory pressure and thus
		980	* shouldn't stall IO. Treat this request as !elvpriv. This will
		981	* disturb iosched and blkcg but weird is bettern than dead.
		982	*/
		983	printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n",
		984	dev_name(q->backing_dev_info.dev));
		985
		986	rq->cmd_flags &= ~REQ_ELVPRIV;
		987	rq->elv.icq = NULL;
		988
		989	spin_lock_irq(q->queue_lock);
		990	rl->elvpriv--;
		991	spin_unlock_irq(q->queue_lock);
		992	goto out;
		993
975	fail_alloc:	994	fail_alloc:
976	/*	995	/*
977	* Allocation failed presumably due to memory. Undo anything we	996	* Allocation failed presumably due to memory. Undo anything we