From 53a9eceab74738a750e693301c5e5998aa18e89a Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Fri, 25 Nov 2016 15:55:19 +0530 Subject: gpu: nvgpu: fix deadlock between clean up and timeout worker In case one job completes just around timeout boundary, it is possible that we launch both clean up worker and timeout worker for same job Then in clean up worker we try to cancel timeout worker, and in timeout worker we try to wait for clean up to finish which leads to deadlock with below stacks stack 1: [] cancel_delayed_work_sync+0x10/0x18 [] gk20a_channel_cancel_job_clean_up+0x20/0x44 [] gk20a_channel_abort_clean_up+0x34/0x31c [] gk20a_channel_abort+0xb4/0xc0 [] gk20a_fifo_recover_ch+0x9c/0xec [] gk20a_fifo_force_reset_ch+0xdc/0xf8 [] gk20a_channel_timeout_handler+0xf8/0x128 stack 2: [] cancel_delayed_work_sync+0x10/0x18 [] gk20a_channel_timeout_stop+0x40/0x60 [] gk20a_channel_clean_up_jobs+0x7c/0x238 To fix this, cancel the timeout worker in gk20a_channel_update() itself instead of cancelling in gk20a_channel_clean_up_jobs() Bug 200246829 Change-Id: Idef9de3cae29668f4e25beb564422cf2e3736182 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1259963 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 8b5a1a9e..e487e079 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -2218,7 +2218,6 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, platform = gk20a_get_platform(g->dev); gk20a_channel_cancel_job_clean_up(c, false); - gk20a_channel_timeout_stop(c); while (1) { bool completed; @@ -2244,8 +2243,6 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, break; } - gk20a_channel_timeout_stop(c); - WARN_ON(!c->sync); if (c->sync) { @@ -2317,6 +2314,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) } trace_gk20a_channel_update(c->hw_chid); + gk20a_channel_timeout_stop(c); gk20a_channel_schedule_job_clean_up(c); gk20a_channel_put(c); -- cgit v1.2.2