diff options
author | NeilBrown <neilb@suse.de> | 2015-02-12 23:49:17 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-18 08:27:44 -0500 |
commit | 9cff8adeaa34b5d2802f03f89803da57856b3b72 (patch) | |
tree | b23f52785c2f1fef1577a4cdf238389bec7e63c1 /kernel | |
parent | bc9560155f4063bbc9be71bd69d6726d41b47653 (diff) |
sched: Prevent recursion in io_schedule()
io_schedule() calls blk_flush_plug() which, depending on the
contents of current->plug, can initiate arbitrary blk-io requests.
Note that this contrasts with blk_schedule_flush_plug() which requires
all non-trivial work to be handed off to a separate thread.
This makes it possible for io_schedule() to recurse, and initiating
block requests could possibly call mempool_alloc() which, in times of
memory pressure, uses io_schedule().
Apart from any stack usage issues, io_schedule() will not behave
correctly when called recursively as delayacct_blkio_start() does
not allow for repeated calls.
So:
- use ->in_iowait to detect recursion. Set it earlier, and restore
it to the old value.
- move the call to "raw_rq" after the call to blk_flush_plug().
As this is some sort of per-cpu thing, we want some chance that
we are on the right CPU
- When io_schedule() is called recurively, use blk_schedule_flush_plug()
which cannot further recurse.
- as this makes io_schedule() a lot more complex and as io_schedule()
must match io_schedule_timeout(), but all the changes in io_schedule_timeout()
and make io_schedule a simple wrapper for that.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Moved the now rudimentary io_schedule() into sched.h. ]
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Tony Battersby <tonyb@cybernetics.com>
Link: http://lkml.kernel.org/r/20150213162600.059fffb2@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/core.c | 31 |
1 files changed, 12 insertions, 19 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c314000f5e52..daaea922f482 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4358,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to); | |||
4358 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 4358 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
4359 | * that process accounting knows that this is a task in IO wait state. | 4359 | * that process accounting knows that this is a task in IO wait state. |
4360 | */ | 4360 | */ |
4361 | void __sched io_schedule(void) | ||
4362 | { | ||
4363 | struct rq *rq = raw_rq(); | ||
4364 | |||
4365 | delayacct_blkio_start(); | ||
4366 | atomic_inc(&rq->nr_iowait); | ||
4367 | blk_flush_plug(current); | ||
4368 | current->in_iowait = 1; | ||
4369 | schedule(); | ||
4370 | current->in_iowait = 0; | ||
4371 | atomic_dec(&rq->nr_iowait); | ||
4372 | delayacct_blkio_end(); | ||
4373 | } | ||
4374 | EXPORT_SYMBOL(io_schedule); | ||
4375 | |||
4376 | long __sched io_schedule_timeout(long timeout) | 4361 | long __sched io_schedule_timeout(long timeout) |
4377 | { | 4362 | { |
4378 | struct rq *rq = raw_rq(); | 4363 | int old_iowait = current->in_iowait; |
4364 | struct rq *rq; | ||
4379 | long ret; | 4365 | long ret; |
4380 | 4366 | ||
4367 | current->in_iowait = 1; | ||
4368 | if (old_iowait) | ||
4369 | blk_schedule_flush_plug(current); | ||
4370 | else | ||
4371 | blk_flush_plug(current); | ||
4372 | |||
4381 | delayacct_blkio_start(); | 4373 | delayacct_blkio_start(); |
4374 | rq = raw_rq(); | ||
4382 | atomic_inc(&rq->nr_iowait); | 4375 | atomic_inc(&rq->nr_iowait); |
4383 | blk_flush_plug(current); | ||
4384 | current->in_iowait = 1; | ||
4385 | ret = schedule_timeout(timeout); | 4376 | ret = schedule_timeout(timeout); |
4386 | current->in_iowait = 0; | 4377 | current->in_iowait = old_iowait; |
4387 | atomic_dec(&rq->nr_iowait); | 4378 | atomic_dec(&rq->nr_iowait); |
4388 | delayacct_blkio_end(); | 4379 | delayacct_blkio_end(); |
4380 | |||
4389 | return ret; | 4381 | return ret; |
4390 | } | 4382 | } |
4383 | EXPORT_SYMBOL(io_schedule_timeout); | ||
4391 | 4384 | ||
4392 | /** | 4385 | /** |
4393 | * sys_sched_get_priority_max - return maximum RT priority. | 4386 | * sys_sched_get_priority_max - return maximum RT priority. |