From 613990cb391c74436384d63d12240221565011d5 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 31 Aug 2015 14:30:35 +0530 Subject: gpu: nvgpu: implement per-channel watchdog Implement per-channel watchdog/timer as per below rules : - start the timer while submitting first job on channel or if no timer is already running - cancel the timer when job completes - re-start the timer if there is any incomplete job left in the channel's queue - trigger appropriate recovery method as part of timeout handling mechanism Handle the timeout as per below : - get timed out channel, and job data - disable activity on all engines - check if fence is really pending - get information on failing engine - if no engine is failing, just abort the channel - if engine is failing, trigger the recovery Also, add flag "ch_wdt_enabled" to enable/disable channel watchdog mechanism. Watchdog can also be disabled using global flag "timeouts_enabled" Set the watchdog time to be 5s using macro NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS Bug 200133289 Change-Id: I401cf14dd34a210bc429f31bd5216a361edf1237 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/797072 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 2ea5b4be..70930291 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -38,6 +38,8 @@ struct gk20a_fence; #include "gr_gk20a.h" #include "fence_gk20a.h" +#define NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS 5000 + struct gpfifo { u32 entry0; u32 entry1; @@ -70,6 +72,13 @@ struct channel_gk20a_job { struct list_head list; }; +struct channel_gk20a_timeout { + struct delayed_work wq; + struct mutex lock; + bool initialized; + struct channel_gk20a_job *job; +}; + struct channel_gk20a_poll_events { struct mutex lock; bool events_enabled; @@ -126,6 +135,8 @@ struct channel_gk20a { u32 timeout_accumulated_ms; u32 timeout_gpfifo_get; + struct channel_gk20a_timeout timeout; + bool cmds_pending; struct { /* These fences should be accessed with submit_lock held. */ -- cgit v1.2.2