diff options
| author | Tejun Heo <tj@kernel.org> | 2012-04-19 19:29:22 -0400 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2012-04-20 04:06:40 -0400 |
| commit | aaf7c680682f1999ef2e574f743c45d1674a8b8a (patch) | |
| tree | 82d597ddfa7456553262435174948d9fb03fffef | |
| parent | 29e2b09ab5fa790514d47838f3c05497130908b3 (diff) | |
block: fix elvpriv allocation failure handling
Request allocation is mempool backed to guarantee forward progress
under memory pressure; unfortunately, this property got broken while
adding elvpriv data. Failures during elvpriv allocation, including
ioc and icq creation failures, currently make get_request() fail as
whole. There's no forward progress guarantee for these allocations -
they may fail indefinitely under memory pressure stalling IO and
deadlocking the system.
This patch updates get_request() such that elvpriv allocation failure
doesn't make the whole function fail. If elvpriv allocation fails,
the allocation is degraded into !ELVPRIV. This will force the request
to ELEVATOR_INSERT_BACK disturbing scheduling but elvpriv alloc
failures should be rare (nothing is per-request) and anything is
better than deadlocking.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
| -rw-r--r-- | block/blk-core.c | 53 |
1 files changed, 36 insertions, 17 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index f6f68b0c8302..6cf13df43c80 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
| 30 | #include <linux/list_sort.h> | 30 | #include <linux/list_sort.h> |
| 31 | #include <linux/delay.h> | 31 | #include <linux/delay.h> |
| 32 | #include <linux/ratelimit.h> | ||
| 32 | 33 | ||
| 33 | #define CREATE_TRACE_POINTS | 34 | #define CREATE_TRACE_POINTS |
| 34 | #include <trace/events/block.h> | 35 | #include <trace/events/block.h> |
| @@ -930,17 +931,6 @@ retry: | |||
| 930 | rw_flags |= REQ_IO_STAT; | 931 | rw_flags |= REQ_IO_STAT; |
| 931 | spin_unlock_irq(q->queue_lock); | 932 | spin_unlock_irq(q->queue_lock); |
| 932 | 933 | ||
| 933 | /* create icq if missing */ | ||
| 934 | if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) { | ||
| 935 | create_io_context(gfp_mask, q->node); | ||
| 936 | ioc = rq_ioc(bio); | ||
| 937 | if (!ioc) | ||
| 938 | goto fail_alloc; | ||
| 939 | icq = ioc_create_icq(ioc, q, gfp_mask); | ||
| 940 | if (!icq) | ||
| 941 | goto fail_alloc; | ||
| 942 | } | ||
| 943 | |||
| 944 | /* allocate and init request */ | 934 | /* allocate and init request */ |
| 945 | rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 935 | rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
| 946 | if (!rq) | 936 | if (!rq) |
| @@ -949,17 +939,28 @@ retry: | |||
| 949 | blk_rq_init(q, rq); | 939 | blk_rq_init(q, rq); |
| 950 | rq->cmd_flags = rw_flags | REQ_ALLOCED; | 940 | rq->cmd_flags = rw_flags | REQ_ALLOCED; |
| 951 | 941 | ||
| 942 | /* init elvpriv */ | ||
| 952 | if (rw_flags & REQ_ELVPRIV) { | 943 | if (rw_flags & REQ_ELVPRIV) { |
| 953 | rq->elv.icq = icq; | 944 | if (unlikely(et->icq_cache && !icq)) { |
| 954 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { | 945 | create_io_context(gfp_mask, q->node); |
| 955 | mempool_free(rq, q->rq.rq_pool); | 946 | ioc = rq_ioc(bio); |
| 956 | goto fail_alloc; | 947 | if (!ioc) |
| 948 | goto fail_elvpriv; | ||
| 949 | |||
| 950 | icq = ioc_create_icq(ioc, q, gfp_mask); | ||
| 951 | if (!icq) | ||
| 952 | goto fail_elvpriv; | ||
| 957 | } | 953 | } |
| 958 | /* @rq->elv.icq holds on to io_context until @rq is freed */ | 954 | |
| 955 | rq->elv.icq = icq; | ||
| 956 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) | ||
| 957 | goto fail_elvpriv; | ||
| 958 | |||
| 959 | /* @rq->elv.icq holds io_context until @rq is freed */ | ||
| 959 | if (icq) | 960 | if (icq) |
| 960 | get_io_context(icq->ioc); | 961 | get_io_context(icq->ioc); |
| 961 | } | 962 | } |
| 962 | 963 | out: | |
| 963 | /* | 964 | /* |
| 964 | * ioc may be NULL here, and ioc_batching will be false. That's | 965 | * ioc may be NULL here, and ioc_batching will be false. That's |
| 965 | * OK, if the queue is under the request limit then requests need | 966 | * OK, if the queue is under the request limit then requests need |
| @@ -972,6 +973,24 @@ retry: | |||
| 972 | trace_block_getrq(q, bio, rw_flags & 1); | 973 | trace_block_getrq(q, bio, rw_flags & 1); |
| 973 | return rq; | 974 | return rq; |
| 974 | 975 | ||
| 976 | fail_elvpriv: | ||
| 977 | /* | ||
| 978 | * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed | ||
| 979 | * and may fail indefinitely under memory pressure and thus | ||
| 980 | * shouldn't stall IO. Treat this request as !elvpriv. This will | ||
| 981 | * disturb iosched and blkcg but weird is bettern than dead. | ||
| 982 | */ | ||
| 983 | printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", | ||
| 984 | dev_name(q->backing_dev_info.dev)); | ||
| 985 | |||
| 986 | rq->cmd_flags &= ~REQ_ELVPRIV; | ||
| 987 | rq->elv.icq = NULL; | ||
| 988 | |||
| 989 | spin_lock_irq(q->queue_lock); | ||
| 990 | rl->elvpriv--; | ||
| 991 | spin_unlock_irq(q->queue_lock); | ||
| 992 | goto out; | ||
| 993 | |||
| 975 | fail_alloc: | 994 | fail_alloc: |
| 976 | /* | 995 | /* |
| 977 | * Allocation failed presumably due to memory. Undo anything we | 996 | * Allocation failed presumably due to memory. Undo anything we |
