summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-02-21 09:42:37 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-09 23:09:44 -0500
commitcb6ed949e272f8ad753bf4ab1c0d20c35f31498b (patch)
tree16d0acad2430e77f9241abe93fae61937e317373 /drivers/gpu/nvgpu/gk20a/channel_gk20a.h
parent4f9368522ea18e3734798d2032b21c58dbb93a04 (diff)
gpu: nvgpu: support per-channel wdt timeouts
Replace the padding in nvgpu_channel_wdt_args with a timeout value in milliseconds, and add NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT to signify the existence of this new field. When the new flag is included in the value of wdt_status, the field is used to set a per-channel timeout to override the per-GPU default. Add NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP to disable the long debug dump when a timed out channel gets recovered by the watchdog. Printing the dump to serial console takes easily several seconds. (Note that there is NVGPU_TIMEOUT_FLAG_DISABLE_DUMP about ctxsw timeout separately for NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX as well.) The behaviour of NVGPU_IOCTL_CHANNEL_WDT is changed so that either NVGPU_IOCTL_CHANNEL_ENABLE_WDT or NVGPU_IOCTL_CHANNEL_DISABLE_WDT has to be set. The old behaviour was that other values were silently ignored. The usage of the global default debugfs-controlled ch_wdt_timeout_ms is changed so that its value takes effect only for newly opened channels instead of in realtime. Also, zero value no longer means that the watchdog is disabled; there is a separate flag for that after all. gk20a_fifo_recover_tsg used to ignore the value of "verbose" when no engines were found. Correct this. Bug 1982826 Bug 1985845 Jira NVGPU-73 Change-Id: Iea6213a646a66cb7c631ed7d7c91d8c2ba8a92a4 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1510898 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h9
1 files changed, 8 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index edb645b5..947b8913 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -96,11 +96,17 @@ struct channel_gk20a_joblist {
96}; 96};
97 97
98struct channel_gk20a_timeout { 98struct channel_gk20a_timeout {
99 /* lock protects the running timer state */
99 struct nvgpu_raw_spinlock lock; 100 struct nvgpu_raw_spinlock lock;
100 struct nvgpu_timeout timer; 101 struct nvgpu_timeout timer;
101 bool running; 102 bool running;
102 u32 gp_get; 103 u32 gp_get;
103 u64 pb_get; 104 u64 pb_get;
105
106 /* lock not needed */
107 u32 limit_ms;
108 bool enabled;
109 bool debug_dump;
104}; 110};
105 111
106/* 112/*
@@ -167,7 +173,6 @@ struct channel_gk20a {
167 struct nvgpu_semaphore_int *hw_sema; 173 struct nvgpu_semaphore_int *hw_sema;
168 174
169 int chid; 175 int chid;
170 bool wdt_enabled;
171 nvgpu_atomic_t bound; 176 nvgpu_atomic_t bound;
172 bool vpr; 177 bool vpr;
173 bool deterministic; 178 bool deterministic;
@@ -203,7 +208,9 @@ struct channel_gk20a {
203 u32 timeout_accumulated_ms; 208 u32 timeout_accumulated_ms;
204 u32 timeout_gpfifo_get; 209 u32 timeout_gpfifo_get;
205 210
211 /* kernel watchdog to kill stuck jobs */
206 struct channel_gk20a_timeout timeout; 212 struct channel_gk20a_timeout timeout;
213
207 /* for job cleanup handling in the background worker */ 214 /* for job cleanup handling in the background worker */
208 struct nvgpu_list_node worker_item; 215 struct nvgpu_list_node worker_item;
209 216