diff options
| author | NeilBrown <neilb@suse.de> | 2015-02-12 23:49:17 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2015-02-18 08:27:44 -0500 |
| commit | 9cff8adeaa34b5d2802f03f89803da57856b3b72 (patch) | |
| tree | b23f52785c2f1fef1577a4cdf238389bec7e63c1 | |
| parent | bc9560155f4063bbc9be71bd69d6726d41b47653 (diff) | |
sched: Prevent recursion in io_schedule()
io_schedule() calls blk_flush_plug() which, depending on the
contents of current->plug, can initiate arbitrary blk-io requests.
Note that this contrasts with blk_schedule_flush_plug() which requires
all non-trivial work to be handed off to a separate thread.
This makes it possible for io_schedule() to recurse, and initiating
block requests could possibly call mempool_alloc() which, in times of
memory pressure, uses io_schedule().
Apart from any stack usage issues, io_schedule() will not behave
correctly when called recursively as delayacct_blkio_start() does
not allow for repeated calls.
So:
- use ->in_iowait to detect recursion. Set it earlier, and restore
it to the old value.
- move the call to "raw_rq" after the call to blk_flush_plug().
As this is some sort of per-cpu thing, we want some chance that
we are on the right CPU
- When io_schedule() is called recurively, use blk_schedule_flush_plug()
which cannot further recurse.
- as this makes io_schedule() a lot more complex and as io_schedule()
must match io_schedule_timeout(), but all the changes in io_schedule_timeout()
and make io_schedule a simple wrapper for that.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Moved the now rudimentary io_schedule() into sched.h. ]
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Tony Battersby <tonyb@cybernetics.com>
Link: http://lkml.kernel.org/r/20150213162600.059fffb2@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | include/linux/sched.h | 10 | ||||
| -rw-r--r-- | kernel/sched/core.c | 31 |
2 files changed, 19 insertions, 22 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db31ef98d2f..cb5cdc777c8a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *); | |||
| 363 | */ | 363 | */ |
| 364 | extern void show_stack(struct task_struct *task, unsigned long *sp); | 364 | extern void show_stack(struct task_struct *task, unsigned long *sp); |
| 365 | 365 | ||
| 366 | void io_schedule(void); | ||
| 367 | long io_schedule_timeout(long timeout); | ||
| 368 | |||
| 369 | extern void cpu_init (void); | 366 | extern void cpu_init (void); |
| 370 | extern void trap_init(void); | 367 | extern void trap_init(void); |
| 371 | extern void update_process_times(int user); | 368 | extern void update_process_times(int user); |
| @@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); | |||
| 422 | asmlinkage void schedule(void); | 419 | asmlinkage void schedule(void); |
| 423 | extern void schedule_preempt_disabled(void); | 420 | extern void schedule_preempt_disabled(void); |
| 424 | 421 | ||
| 422 | extern long io_schedule_timeout(long timeout); | ||
| 423 | |||
| 424 | static inline void io_schedule(void) | ||
| 425 | { | ||
| 426 | io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); | ||
| 427 | } | ||
| 428 | |||
| 425 | struct nsproxy; | 429 | struct nsproxy; |
| 426 | struct user_namespace; | 430 | struct user_namespace; |
| 427 | 431 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c314000f5e52..daaea922f482 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -4358,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to); | |||
| 4358 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 4358 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
| 4359 | * that process accounting knows that this is a task in IO wait state. | 4359 | * that process accounting knows that this is a task in IO wait state. |
| 4360 | */ | 4360 | */ |
| 4361 | void __sched io_schedule(void) | ||
| 4362 | { | ||
| 4363 | struct rq *rq = raw_rq(); | ||
| 4364 | |||
| 4365 | delayacct_blkio_start(); | ||
| 4366 | atomic_inc(&rq->nr_iowait); | ||
| 4367 | blk_flush_plug(current); | ||
| 4368 | current->in_iowait = 1; | ||
| 4369 | schedule(); | ||
| 4370 | current->in_iowait = 0; | ||
| 4371 | atomic_dec(&rq->nr_iowait); | ||
| 4372 | delayacct_blkio_end(); | ||
| 4373 | } | ||
| 4374 | EXPORT_SYMBOL(io_schedule); | ||
| 4375 | |||
| 4376 | long __sched io_schedule_timeout(long timeout) | 4361 | long __sched io_schedule_timeout(long timeout) |
| 4377 | { | 4362 | { |
| 4378 | struct rq *rq = raw_rq(); | 4363 | int old_iowait = current->in_iowait; |
| 4364 | struct rq *rq; | ||
| 4379 | long ret; | 4365 | long ret; |
| 4380 | 4366 | ||
| 4367 | current->in_iowait = 1; | ||
| 4368 | if (old_iowait) | ||
| 4369 | blk_schedule_flush_plug(current); | ||
| 4370 | else | ||
| 4371 | blk_flush_plug(current); | ||
| 4372 | |||
| 4381 | delayacct_blkio_start(); | 4373 | delayacct_blkio_start(); |
| 4374 | rq = raw_rq(); | ||
| 4382 | atomic_inc(&rq->nr_iowait); | 4375 | atomic_inc(&rq->nr_iowait); |
| 4383 | blk_flush_plug(current); | ||
| 4384 | current->in_iowait = 1; | ||
| 4385 | ret = schedule_timeout(timeout); | 4376 | ret = schedule_timeout(timeout); |
| 4386 | current->in_iowait = 0; | 4377 | current->in_iowait = old_iowait; |
| 4387 | atomic_dec(&rq->nr_iowait); | 4378 | atomic_dec(&rq->nr_iowait); |
| 4388 | delayacct_blkio_end(); | 4379 | delayacct_blkio_end(); |
| 4380 | |||
| 4389 | return ret; | 4381 | return ret; |
| 4390 | } | 4382 | } |
| 4383 | EXPORT_SYMBOL(io_schedule_timeout); | ||
| 4391 | 4384 | ||
| 4392 | /** | 4385 | /** |
| 4393 | * sys_sched_get_priority_max - return maximum RT priority. | 4386 | * sys_sched_get_priority_max - return maximum RT priority. |
