From cb6ed949e272f8ad753bf4ab1c0d20c35f31498b Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Wed, 21 Feb 2018 16:42:37 +0200 Subject: gpu: nvgpu: support per-channel wdt timeouts Replace the padding in nvgpu_channel_wdt_args with a timeout value in milliseconds, and add NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT to signify the existence of this new field. When the new flag is included in the value of wdt_status, the field is used to set a per-channel timeout to override the per-GPU default. Add NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP to disable the long debug dump when a timed out channel gets recovered by the watchdog. Printing the dump to serial console takes easily several seconds. (Note that there is NVGPU_TIMEOUT_FLAG_DISABLE_DUMP about ctxsw timeout separately for NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX as well.) The behaviour of NVGPU_IOCTL_CHANNEL_WDT is changed so that either NVGPU_IOCTL_CHANNEL_ENABLE_WDT or NVGPU_IOCTL_CHANNEL_DISABLE_WDT has to be set. The old behaviour was that other values were silently ignored. The usage of the global default debugfs-controlled ch_wdt_timeout_ms is changed so that its value takes effect only for newly opened channels instead of in realtime. Also, zero value no longer means that the watchdog is disabled; there is a separate flag for that after all. gk20a_fifo_recover_tsg used to ignore the value of "verbose" when no engines were found. Correct this. Bug 1982826 Bug 1985845 Jira NVGPU-73 Change-Id: Iea6213a646a66cb7c631ed7d7c91d8c2ba8a92a4 Signed-off-by: Konsta Holtta Reviewed-on: https://git-master.nvidia.com/r/1510898 Reviewed-by: mobile promotions Tested-by: mobile promotions --- include/uapi/linux/nvgpu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index cf75595a..8a578102 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1577,13 +1577,15 @@ struct nvgpu_cycle_stats_snapshot_args { #define NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH 1 #define NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH 2 -/* disable watchdog per-channel */ +/* configure watchdog per-channel */ struct nvgpu_channel_wdt_args { __u32 wdt_status; - __u32 padding; + __u32 timeout_ms; }; -#define NVGPU_IOCTL_CHANNEL_DISABLE_WDT 1 -#define NVGPU_IOCTL_CHANNEL_ENABLE_WDT 2 +#define NVGPU_IOCTL_CHANNEL_DISABLE_WDT (1 << 0) +#define NVGPU_IOCTL_CHANNEL_ENABLE_WDT (1 << 1) +#define NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT (1 << 2) +#define NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP (1 << 3) /* * Interleaving channels in a runlist is an approach to improve -- cgit v1.2.2