From 0ce201e8de6a320b70f1f34d05202650b9b5a046 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 7 Jan 2016 14:31:12 +0530 Subject: gpu: nvgpu: stop timer on failing channel In gk20a_channel_timeout_handler(), below deadlock scenario is possible : thread 1: - take global lock g->ch_wdt_lock - identify timed out channel (as ch1) - check engine status which is stuck - identify failing channel on engine as ch2 - we need to trigger recovery with ch2 - as part of recovery, call channel_abort() for ch2 - in channel_abort(), we wait to cancel the timer wq - but timer wq for ch2 never completes due to thread 2 thread 2: - ch2 has already timed out - to process, we wait for global lock g->ch_wdt_lock - this lock needs to be released by thread 1 To fix this, cancel the timer (through flag) of ch2 (failing channel on engine) before triggering recovery on that channel Bug 200164753 Change-Id: Idb42d01c8440a53f43cb5e87e41f1c283f7e8fcf Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/929924 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index f0a700ac..2421307f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1700,9 +1700,14 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) if (!failing_ch) goto fail_enable_ctxsw; - if (failing_ch->hw_chid != ch->hw_chid) + if (failing_ch->hw_chid != ch->hw_chid) { gk20a_channel_timeout_start(ch, job); + mutex_lock(&failing_ch->timeout.lock); + failing_ch->timeout.initialized = false; + mutex_unlock(&failing_ch->timeout.lock); + } + gk20a_fifo_recover(g, BIT(engine_id), failing_ch->hw_chid, is_tsg, true, failing_ch->timeout_debug_dump); -- cgit v1.2.2