aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Penyaev <rpenyaev@suse.de>2019-05-16 04:53:57 -0400
committerJens Axboe <axboe@kernel.dk>2019-05-16 10:10:24 -0400
commit2bbcd6d3b36a75a19be4917807f54ae32dd26aba (patch)
treed4d54ef12b24272d399f0699a37082c207dbf229
parentc71ffb673cd9bb2ddc575ede9055f265b2535690 (diff)
io_uring: fix infinite wait in khread_park() on io_finish_async()
This fixes couple of races which lead to infinite wait of park completion with the following backtraces: [20801.303319] Call Trace: [20801.303321] ? __schedule+0x284/0x650 [20801.303323] schedule+0x33/0xc0 [20801.303324] schedule_timeout+0x1bc/0x210 [20801.303326] ? schedule+0x3d/0xc0 [20801.303327] ? schedule_timeout+0x1bc/0x210 [20801.303329] ? preempt_count_add+0x79/0xb0 [20801.303330] wait_for_completion+0xa5/0x120 [20801.303331] ? wake_up_q+0x70/0x70 [20801.303333] kthread_park+0x48/0x80 [20801.303335] io_finish_async+0x2c/0x70 [20801.303336] io_ring_ctx_wait_and_kill+0x95/0x180 [20801.303338] io_uring_release+0x1c/0x20 [20801.303339] __fput+0xad/0x210 [20801.303341] task_work_run+0x8f/0xb0 [20801.303342] exit_to_usermode_loop+0xa0/0xb0 [20801.303343] do_syscall_64+0xe0/0x100 [20801.303349] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [20801.303380] Call Trace: [20801.303383] ? __schedule+0x284/0x650 [20801.303384] schedule+0x33/0xc0 [20801.303386] io_sq_thread+0x38a/0x410 [20801.303388] ? __switch_to_asm+0x40/0x70 [20801.303390] ? wait_woken+0x80/0x80 [20801.303392] ? _raw_spin_lock_irqsave+0x17/0x40 [20801.303394] ? io_submit_sqes+0x120/0x120 [20801.303395] kthread+0x112/0x130 [20801.303396] ? kthread_create_on_node+0x60/0x60 [20801.303398] ret_from_fork+0x35/0x40 o kthread_park() waits for park completion, so io_sq_thread() loop should check kthread_should_park() along with khread_should_stop(), otherwise if kthread_park() is called before prepare_to_wait() the following schedule() never returns: CPU#0 CPU#1 io_sq_thread_stop(): io_sq_thread(): while(!kthread_should_stop() && !ctx->sqo_stop) { ctx->sqo_stop = 1; kthread_park() prepare_to_wait(); if (kthread_should_stop() { } schedule(); <<< nobody checks park flag, <<< so schedule and never return o if the flag ctx->sqo_stop is observed by the io_sq_thread() loop it is quite possible, that kthread_should_park() check and the following kthread_parkme() is never called, because kthread_park() has not been yet called, but few moments later is is called and waits there for park completion, which never happens, because kthread has already exited: CPU#0 CPU#1 io_sq_thread_stop(): io_sq_thread(): ctx->sqo_stop = 1; while(!kthread_should_stop() && !ctx->sqo_stop) { <<< observe sqo_stop and exit the loop } if (kthread_should_park()) kthread_parkme(); <<< never called, since was <<< never parked kthread_park() <<< waits forever for park completion In the current patch we quit the loop by only kthread_should_park() check (kthread_park() is synchronous, so kthread_should_stop() is never observed), and we abandon ->sqo_stop flag, since it is racy. At the end of the io_sq_thread() we unconditionally call parmke(), since we've exited the loop by the park flag. Signed-off-by: Roman Penyaev <rpenyaev@suse.de> Cc: Jens Axboe <axboe@kernel.dk> Cc: linux-block@vger.kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--fs/io_uring.c15
1 files changed, 8 insertions, 7 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ac0407693834..67d1aae349d7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -231,7 +231,6 @@ struct io_ring_ctx {
231 struct task_struct *sqo_thread; /* if using sq thread polling */ 231 struct task_struct *sqo_thread; /* if using sq thread polling */
232 struct mm_struct *sqo_mm; 232 struct mm_struct *sqo_mm;
233 wait_queue_head_t sqo_wait; 233 wait_queue_head_t sqo_wait;
234 unsigned sqo_stop;
235 234
236 struct { 235 struct {
237 /* CQ ring */ 236 /* CQ ring */
@@ -2015,7 +2014,7 @@ static int io_sq_thread(void *data)
2015 set_fs(USER_DS); 2014 set_fs(USER_DS);
2016 2015
2017 timeout = inflight = 0; 2016 timeout = inflight = 0;
2018 while (!kthread_should_stop() && !ctx->sqo_stop) { 2017 while (!kthread_should_park()) {
2019 bool all_fixed, mm_fault = false; 2018 bool all_fixed, mm_fault = false;
2020 int i; 2019 int i;
2021 2020
@@ -2077,7 +2076,7 @@ static int io_sq_thread(void *data)
2077 smp_mb(); 2076 smp_mb();
2078 2077
2079 if (!io_get_sqring(ctx, &sqes[0])) { 2078 if (!io_get_sqring(ctx, &sqes[0])) {
2080 if (kthread_should_stop()) { 2079 if (kthread_should_park()) {
2081 finish_wait(&ctx->sqo_wait, &wait); 2080 finish_wait(&ctx->sqo_wait, &wait);
2082 break; 2081 break;
2083 } 2082 }
@@ -2127,8 +2126,7 @@ static int io_sq_thread(void *data)
2127 mmput(cur_mm); 2126 mmput(cur_mm);
2128 } 2127 }
2129 2128
2130 if (kthread_should_park()) 2129 kthread_parkme();
2131 kthread_parkme();
2132 2130
2133 return 0; 2131 return 0;
2134} 2132}
@@ -2260,8 +2258,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
2260static void io_sq_thread_stop(struct io_ring_ctx *ctx) 2258static void io_sq_thread_stop(struct io_ring_ctx *ctx)
2261{ 2259{
2262 if (ctx->sqo_thread) { 2260 if (ctx->sqo_thread) {
2263 ctx->sqo_stop = 1; 2261 /*
2264 mb(); 2262 * The park is a bit of a work-around, without it we get
2263 * warning spews on shutdown with SQPOLL set and affinity
2264 * set to a single CPU.
2265 */
2265 kthread_park(ctx->sqo_thread); 2266 kthread_park(ctx->sqo_thread);
2266 kthread_stop(ctx->sqo_thread); 2267 kthread_stop(ctx->sqo_thread);
2267 ctx->sqo_thread = NULL; 2268 ctx->sqo_thread = NULL;