diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-02-21 09:42:37 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-03-09 23:09:44 -0500 |
commit | cb6ed949e272f8ad753bf4ab1c0d20c35f31498b (patch) | |
tree | 16d0acad2430e77f9241abe93fae61937e317373 /drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |
parent | 4f9368522ea18e3734798d2032b21c58dbb93a04 (diff) |
gpu: nvgpu: support per-channel wdt timeouts
Replace the padding in nvgpu_channel_wdt_args with a timeout value in
milliseconds, and add NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT to
signify the existence of this new field. When the new flag is included
in the value of wdt_status, the field is used to set a per-channel
timeout to override the per-GPU default.
Add NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP to disable the long debug
dump when a timed out channel gets recovered by the watchdog. Printing
the dump to serial console takes easily several seconds. (Note that
there is NVGPU_TIMEOUT_FLAG_DISABLE_DUMP about ctxsw timeout separately
for NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX as well.)
The behaviour of NVGPU_IOCTL_CHANNEL_WDT is changed so that either
NVGPU_IOCTL_CHANNEL_ENABLE_WDT or NVGPU_IOCTL_CHANNEL_DISABLE_WDT has to
be set. The old behaviour was that other values were silently ignored.
The usage of the global default debugfs-controlled ch_wdt_timeout_ms is
changed so that its value takes effect only for newly opened channels
instead of in realtime. Also, zero value no longer means that the
watchdog is disabled; there is a separate flag for that after all.
gk20a_fifo_recover_tsg used to ignore the value of "verbose" when no
engines were found. Correct this.
Bug 1982826
Bug 1985845
Jira NVGPU-73
Change-Id: Iea6213a646a66cb7c631ed7d7c91d8c2ba8a92a4
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1510898
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index edb645b5..947b8913 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -96,11 +96,17 @@ struct channel_gk20a_joblist { | |||
96 | }; | 96 | }; |
97 | 97 | ||
98 | struct channel_gk20a_timeout { | 98 | struct channel_gk20a_timeout { |
99 | /* lock protects the running timer state */ | ||
99 | struct nvgpu_raw_spinlock lock; | 100 | struct nvgpu_raw_spinlock lock; |
100 | struct nvgpu_timeout timer; | 101 | struct nvgpu_timeout timer; |
101 | bool running; | 102 | bool running; |
102 | u32 gp_get; | 103 | u32 gp_get; |
103 | u64 pb_get; | 104 | u64 pb_get; |
105 | |||
106 | /* lock not needed */ | ||
107 | u32 limit_ms; | ||
108 | bool enabled; | ||
109 | bool debug_dump; | ||
104 | }; | 110 | }; |
105 | 111 | ||
106 | /* | 112 | /* |
@@ -167,7 +173,6 @@ struct channel_gk20a { | |||
167 | struct nvgpu_semaphore_int *hw_sema; | 173 | struct nvgpu_semaphore_int *hw_sema; |
168 | 174 | ||
169 | int chid; | 175 | int chid; |
170 | bool wdt_enabled; | ||
171 | nvgpu_atomic_t bound; | 176 | nvgpu_atomic_t bound; |
172 | bool vpr; | 177 | bool vpr; |
173 | bool deterministic; | 178 | bool deterministic; |
@@ -203,7 +208,9 @@ struct channel_gk20a { | |||
203 | u32 timeout_accumulated_ms; | 208 | u32 timeout_accumulated_ms; |
204 | u32 timeout_gpfifo_get; | 209 | u32 timeout_gpfifo_get; |
205 | 210 | ||
211 | /* kernel watchdog to kill stuck jobs */ | ||
206 | struct channel_gk20a_timeout timeout; | 212 | struct channel_gk20a_timeout timeout; |
213 | |||
207 | /* for job cleanup handling in the background worker */ | 214 | /* for job cleanup handling in the background worker */ |
208 | struct nvgpu_list_node worker_item; | 215 | struct nvgpu_list_node worker_item; |
209 | 216 | ||