diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-08-31 05:00:35 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-09-28 12:08:12 -0400 |
commit | 613990cb391c74436384d63d12240221565011d5 (patch) | |
tree | 27d7cd19bd84a6ce50fb579c5f6a08ada28ba5b7 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |
parent | cb8c102131ec96767e01981dc9a9d26e30593a70 (diff) |
gpu: nvgpu: implement per-channel watchdog
Implement per-channel watchdog/timer as per below rules :
- start the timer while submitting first job on channel or if
no timer is already running
- cancel the timer when job completes
- re-start the timer if there is any incomplete job left
in the channel's queue
- trigger appropriate recovery method as part of timeout
handling mechanism
Handle the timeout as per below :
- get timed out channel, and job data
- disable activity on all engines
- check if fence is really pending
- get information on failing engine
- if no engine is failing, just abort the channel
- if engine is failing, trigger the recovery
Also, add flag "ch_wdt_enabled" to enable/disable channel
watchdog mechanism. Watchdog can also be disabled using
global flag "timeouts_enabled"
Set the watchdog time to be 5s using macro
NVGPU_CHANNEL_WATCHDOG_DEFAULT_TIMEOUT_MS
Bug 200133289
Change-Id: I401cf14dd34a210bc429f31bd5216a361edf1237
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/797072
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 069ea82a..f736fe8c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -852,7 +852,7 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, | |||
852 | return verbose; | 852 | return verbose; |
853 | } | 853 | } |
854 | 854 | ||
855 | static bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, | 855 | bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, |
856 | struct channel_gk20a *ch) | 856 | struct channel_gk20a *ch) |
857 | { | 857 | { |
858 | gk20a_err(dev_from_gk20a(g), | 858 | gk20a_err(dev_from_gk20a(g), |
@@ -861,7 +861,7 @@ static bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g, | |||
861 | return gk20a_fifo_set_ctx_mmu_error(g, ch); | 861 | return gk20a_fifo_set_ctx_mmu_error(g, ch); |
862 | } | 862 | } |
863 | 863 | ||
864 | static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | 864 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, |
865 | struct tsg_gk20a *tsg) | 865 | struct tsg_gk20a *tsg) |
866 | { | 866 | { |
867 | bool ret = true; | 867 | bool ret = true; |
@@ -883,7 +883,7 @@ static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | |||
883 | return ret; | 883 | return ret; |
884 | } | 884 | } |
885 | 885 | ||
886 | static void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid) | 886 | void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid) |
887 | { | 887 | { |
888 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; | 888 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; |
889 | struct channel_gk20a *ch; | 889 | struct channel_gk20a *ch; |