diff options
author | Roman Penyaev <rpenyaev@suse.de> | 2019-05-16 04:53:57 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-05-16 10:10:24 -0400 |
commit | 2bbcd6d3b36a75a19be4917807f54ae32dd26aba (patch) | |
tree | d4d54ef12b24272d399f0699a37082c207dbf229 | |
parent | c71ffb673cd9bb2ddc575ede9055f265b2535690 (diff) |
io_uring: fix infinite wait in khread_park() on io_finish_async()
This fixes couple of races which lead to infinite wait of park completion
with the following backtraces:
[20801.303319] Call Trace:
[20801.303321] ? __schedule+0x284/0x650
[20801.303323] schedule+0x33/0xc0
[20801.303324] schedule_timeout+0x1bc/0x210
[20801.303326] ? schedule+0x3d/0xc0
[20801.303327] ? schedule_timeout+0x1bc/0x210
[20801.303329] ? preempt_count_add+0x79/0xb0
[20801.303330] wait_for_completion+0xa5/0x120
[20801.303331] ? wake_up_q+0x70/0x70
[20801.303333] kthread_park+0x48/0x80
[20801.303335] io_finish_async+0x2c/0x70
[20801.303336] io_ring_ctx_wait_and_kill+0x95/0x180
[20801.303338] io_uring_release+0x1c/0x20
[20801.303339] __fput+0xad/0x210
[20801.303341] task_work_run+0x8f/0xb0
[20801.303342] exit_to_usermode_loop+0xa0/0xb0
[20801.303343] do_syscall_64+0xe0/0x100
[20801.303349] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[20801.303380] Call Trace:
[20801.303383] ? __schedule+0x284/0x650
[20801.303384] schedule+0x33/0xc0
[20801.303386] io_sq_thread+0x38a/0x410
[20801.303388] ? __switch_to_asm+0x40/0x70
[20801.303390] ? wait_woken+0x80/0x80
[20801.303392] ? _raw_spin_lock_irqsave+0x17/0x40
[20801.303394] ? io_submit_sqes+0x120/0x120
[20801.303395] kthread+0x112/0x130
[20801.303396] ? kthread_create_on_node+0x60/0x60
[20801.303398] ret_from_fork+0x35/0x40
o kthread_park() waits for park completion, so io_sq_thread() loop
should check kthread_should_park() along with khread_should_stop(),
otherwise if kthread_park() is called before prepare_to_wait()
the following schedule() never returns:
CPU#0 CPU#1
io_sq_thread_stop(): io_sq_thread():
while(!kthread_should_stop() && !ctx->sqo_stop) {
ctx->sqo_stop = 1;
kthread_park()
prepare_to_wait();
if (kthread_should_stop() {
}
schedule(); <<< nobody checks park flag,
<<< so schedule and never return
o if the flag ctx->sqo_stop is observed by the io_sq_thread() loop
it is quite possible, that kthread_should_park() check and the
following kthread_parkme() is never called, because kthread_park()
has not been yet called, but few moments later is is called and
waits there for park completion, which never happens, because
kthread has already exited:
CPU#0 CPU#1
io_sq_thread_stop(): io_sq_thread():
ctx->sqo_stop = 1;
while(!kthread_should_stop() && !ctx->sqo_stop) {
<<< observe sqo_stop and exit the loop
}
if (kthread_should_park())
kthread_parkme(); <<< never called, since was
<<< never parked
kthread_park() <<< waits forever for park completion
In the current patch we quit the loop by only kthread_should_park()
check (kthread_park() is synchronous, so kthread_should_stop() is
never observed), and we abandon ->sqo_stop flag, since it is racy.
At the end of the io_sq_thread() we unconditionally call parmke(),
since we've exited the loop by the park flag.
Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | fs/io_uring.c | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index ac0407693834..67d1aae349d7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -231,7 +231,6 @@ struct io_ring_ctx { | |||
231 | struct task_struct *sqo_thread; /* if using sq thread polling */ | 231 | struct task_struct *sqo_thread; /* if using sq thread polling */ |
232 | struct mm_struct *sqo_mm; | 232 | struct mm_struct *sqo_mm; |
233 | wait_queue_head_t sqo_wait; | 233 | wait_queue_head_t sqo_wait; |
234 | unsigned sqo_stop; | ||
235 | 234 | ||
236 | struct { | 235 | struct { |
237 | /* CQ ring */ | 236 | /* CQ ring */ |
@@ -2015,7 +2014,7 @@ static int io_sq_thread(void *data) | |||
2015 | set_fs(USER_DS); | 2014 | set_fs(USER_DS); |
2016 | 2015 | ||
2017 | timeout = inflight = 0; | 2016 | timeout = inflight = 0; |
2018 | while (!kthread_should_stop() && !ctx->sqo_stop) { | 2017 | while (!kthread_should_park()) { |
2019 | bool all_fixed, mm_fault = false; | 2018 | bool all_fixed, mm_fault = false; |
2020 | int i; | 2019 | int i; |
2021 | 2020 | ||
@@ -2077,7 +2076,7 @@ static int io_sq_thread(void *data) | |||
2077 | smp_mb(); | 2076 | smp_mb(); |
2078 | 2077 | ||
2079 | if (!io_get_sqring(ctx, &sqes[0])) { | 2078 | if (!io_get_sqring(ctx, &sqes[0])) { |
2080 | if (kthread_should_stop()) { | 2079 | if (kthread_should_park()) { |
2081 | finish_wait(&ctx->sqo_wait, &wait); | 2080 | finish_wait(&ctx->sqo_wait, &wait); |
2082 | break; | 2081 | break; |
2083 | } | 2082 | } |
@@ -2127,8 +2126,7 @@ static int io_sq_thread(void *data) | |||
2127 | mmput(cur_mm); | 2126 | mmput(cur_mm); |
2128 | } | 2127 | } |
2129 | 2128 | ||
2130 | if (kthread_should_park()) | 2129 | kthread_parkme(); |
2131 | kthread_parkme(); | ||
2132 | 2130 | ||
2133 | return 0; | 2131 | return 0; |
2134 | } | 2132 | } |
@@ -2260,8 +2258,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) | |||
2260 | static void io_sq_thread_stop(struct io_ring_ctx *ctx) | 2258 | static void io_sq_thread_stop(struct io_ring_ctx *ctx) |
2261 | { | 2259 | { |
2262 | if (ctx->sqo_thread) { | 2260 | if (ctx->sqo_thread) { |
2263 | ctx->sqo_stop = 1; | 2261 | /* |
2264 | mb(); | 2262 | * The park is a bit of a work-around, without it we get |
2263 | * warning spews on shutdown with SQPOLL set and affinity | ||
2264 | * set to a single CPU. | ||
2265 | */ | ||
2265 | kthread_park(ctx->sqo_thread); | 2266 | kthread_park(ctx->sqo_thread); |
2266 | kthread_stop(ctx->sqo_thread); | 2267 | kthread_stop(ctx->sqo_thread); |
2267 | ctx->sqo_thread = NULL; | 2268 | ctx->sqo_thread = NULL; |