diff options
author | Tejun Heo <tj@kernel.org> | 2012-04-19 19:29:22 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-04-20 04:06:40 -0400 |
commit | aaf7c680682f1999ef2e574f743c45d1674a8b8a (patch) | |
tree | 82d597ddfa7456553262435174948d9fb03fffef /block | |
parent | 29e2b09ab5fa790514d47838f3c05497130908b3 (diff) |
block: fix elvpriv allocation failure handling
Request allocation is mempool backed to guarantee forward progress
under memory pressure; unfortunately, this property got broken while
adding elvpriv data. Failures during elvpriv allocation, including
ioc and icq creation failures, currently make get_request() fail as
whole. There's no forward progress guarantee for these allocations -
they may fail indefinitely under memory pressure stalling IO and
deadlocking the system.
This patch updates get_request() such that elvpriv allocation failure
doesn't make the whole function fail. If elvpriv allocation fails,
the allocation is degraded into !ELVPRIV. This will force the request
to ELEVATOR_INSERT_BACK disturbing scheduling but elvpriv alloc
failures should be rare (nothing is per-request) and anything is
better than deadlocking.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 53 |
1 files changed, 36 insertions, 17 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index f6f68b0c8302..6cf13df43c80 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | #include <linux/list_sort.h> | 30 | #include <linux/list_sort.h> |
31 | #include <linux/delay.h> | 31 | #include <linux/delay.h> |
32 | #include <linux/ratelimit.h> | ||
32 | 33 | ||
33 | #define CREATE_TRACE_POINTS | 34 | #define CREATE_TRACE_POINTS |
34 | #include <trace/events/block.h> | 35 | #include <trace/events/block.h> |
@@ -930,17 +931,6 @@ retry: | |||
930 | rw_flags |= REQ_IO_STAT; | 931 | rw_flags |= REQ_IO_STAT; |
931 | spin_unlock_irq(q->queue_lock); | 932 | spin_unlock_irq(q->queue_lock); |
932 | 933 | ||
933 | /* create icq if missing */ | ||
934 | if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) { | ||
935 | create_io_context(gfp_mask, q->node); | ||
936 | ioc = rq_ioc(bio); | ||
937 | if (!ioc) | ||
938 | goto fail_alloc; | ||
939 | icq = ioc_create_icq(ioc, q, gfp_mask); | ||
940 | if (!icq) | ||
941 | goto fail_alloc; | ||
942 | } | ||
943 | |||
944 | /* allocate and init request */ | 934 | /* allocate and init request */ |
945 | rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 935 | rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
946 | if (!rq) | 936 | if (!rq) |
@@ -949,17 +939,28 @@ retry: | |||
949 | blk_rq_init(q, rq); | 939 | blk_rq_init(q, rq); |
950 | rq->cmd_flags = rw_flags | REQ_ALLOCED; | 940 | rq->cmd_flags = rw_flags | REQ_ALLOCED; |
951 | 941 | ||
942 | /* init elvpriv */ | ||
952 | if (rw_flags & REQ_ELVPRIV) { | 943 | if (rw_flags & REQ_ELVPRIV) { |
953 | rq->elv.icq = icq; | 944 | if (unlikely(et->icq_cache && !icq)) { |
954 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { | 945 | create_io_context(gfp_mask, q->node); |
955 | mempool_free(rq, q->rq.rq_pool); | 946 | ioc = rq_ioc(bio); |
956 | goto fail_alloc; | 947 | if (!ioc) |
948 | goto fail_elvpriv; | ||
949 | |||
950 | icq = ioc_create_icq(ioc, q, gfp_mask); | ||
951 | if (!icq) | ||
952 | goto fail_elvpriv; | ||
957 | } | 953 | } |
958 | /* @rq->elv.icq holds on to io_context until @rq is freed */ | 954 | |
955 | rq->elv.icq = icq; | ||
956 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) | ||
957 | goto fail_elvpriv; | ||
958 | |||
959 | /* @rq->elv.icq holds io_context until @rq is freed */ | ||
959 | if (icq) | 960 | if (icq) |
960 | get_io_context(icq->ioc); | 961 | get_io_context(icq->ioc); |
961 | } | 962 | } |
962 | 963 | out: | |
963 | /* | 964 | /* |
964 | * ioc may be NULL here, and ioc_batching will be false. That's | 965 | * ioc may be NULL here, and ioc_batching will be false. That's |
965 | * OK, if the queue is under the request limit then requests need | 966 | * OK, if the queue is under the request limit then requests need |
@@ -972,6 +973,24 @@ retry: | |||
972 | trace_block_getrq(q, bio, rw_flags & 1); | 973 | trace_block_getrq(q, bio, rw_flags & 1); |
973 | return rq; | 974 | return rq; |
974 | 975 | ||
976 | fail_elvpriv: | ||
977 | /* | ||
978 | * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed | ||
979 | * and may fail indefinitely under memory pressure and thus | ||
980 | * shouldn't stall IO. Treat this request as !elvpriv. This will | ||
981 | * disturb iosched and blkcg but weird is bettern than dead. | ||
982 | */ | ||
983 | printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", | ||
984 | dev_name(q->backing_dev_info.dev)); | ||
985 | |||
986 | rq->cmd_flags &= ~REQ_ELVPRIV; | ||
987 | rq->elv.icq = NULL; | ||
988 | |||
989 | spin_lock_irq(q->queue_lock); | ||
990 | rl->elvpriv--; | ||
991 | spin_unlock_irq(q->queue_lock); | ||
992 | goto out; | ||
993 | |||
975 | fail_alloc: | 994 | fail_alloc: |
976 | /* | 995 | /* |
977 | * Allocation failed presumably due to memory. Undo anything we | 996 | * Allocation failed presumably due to memory. Undo anything we |