From ec011cd1ee2fb3389d96ba717ad549cd12c69db1 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Mon, 7 Nov 2016 10:17:56 -0800 Subject: gpu: nvgpu: add device alarms Add event definitions for: - Clock alarm (target frequency not met) - Thermal alarm (temperature above threshold) - Power alarm (power above threshold) - GPU shut down Jira DNVGPU-186 Change-Id: I52edd44352ed0cba83033949272f41cc9e1c630f Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1249342 (cherry picked from commit 67a6681aade241ff24982771778f7e2193d1cd7f) Reviewed-on: http://git-master/r/1267157 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- include/uapi/linux/nvgpu.h | 77 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 0aeea4f0..746ae94a 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -126,6 +126,10 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_GET_POWER (1ULL << 12) /* NVGPU_GPU_IOCTL_GET_TEMPERATURE is available */ #define NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE (1ULL << 13) +/* NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT is available */ +#define NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT (1ULL << 14) +/* NVGPU_GPU_IOCTL_GET_EVENT_FD is available */ +#define NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS (1ULL << 15) struct nvgpu_gpu_characteristics { __u32 arch; @@ -223,6 +227,8 @@ struct nvgpu_gpu_characteristics { - If the last field is reserved/padding, it is not generally safe to repurpose the field in future revisions. */ + __s16 event_ioctl_nr_last; + __u16 pad[3]; }; struct nvgpu_gpu_get_characteristics { @@ -684,12 +690,18 @@ struct nvgpu_gpu_clk_set_info_args { __s32 completion_fd; }; -struct nvgpu_gpu_clk_get_event_fd_args { +struct nvgpu_gpu_get_event_fd_args { /* in: Flags (not currently used). */ __u32 flags; - /* out: File descriptor for events, i.e. any clock update. */ + /* out: File descriptor for events, i.e. clock update. + * On successful polling of this event_fd, application is + * expected to read status (nvgpu_gpu_event_info), + * which provides detailed event information + * For a poll operation, alarms will be reported with POLLPRI, + * and GPU shutdown will be reported with POLLHUP. + */ __s32 event_fd; }; @@ -815,8 +827,8 @@ struct nvgpu_gpu_get_temperature_args { _IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args) #define NVGPU_GPU_IOCTL_CLK_SET_INFO \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args) -#define NVGPU_GPU_IOCTL_CLK_GET_EVENT_FD \ - _IOWR(NVGPU_GPU_IOCTL_MAGIC, 32, struct nvgpu_gpu_clk_get_event_fd_args) +#define NVGPU_GPU_IOCTL_GET_EVENT_FD \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 32, struct nvgpu_gpu_get_event_fd_args) #define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \ struct nvgpu_gpu_get_memory_state_args) @@ -835,6 +847,63 @@ struct nvgpu_gpu_get_temperature_args { #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) +/* + * Event session + * + * NVGPU_GPU_IOCTL_GET_EVENT_FD opens an event session. + * Below ioctls can be used on these sessions fds. + */ +#define NVGPU_EVENT_IOCTL_MAGIC 'E' + +/* Normal events (POLLIN) */ +/* Event associated to a VF update */ +#define NVGPU_GPU_EVENT_VF_UPDATE 0 + +/* Recoverable alarms (POLLPRI) */ +/* Alarm when target frequency on any session is not possible */ +#define NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE 2 +/* Alarm when target frequency on current session is not possible */ +#define NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE 3 +/* Alarm when Clock Arbiter failed */ +#define NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED 4 +/* Alarm when VF table update failed */ +#define NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED 5 +/* Alarm on thermal condition */ +#define NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD 6 +/* Alarm on power condition */ +#define NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD 7 + +/* Non recoverable alarm (POLLUP) */ +/* Alarm on GPU shutdown/fall from bus */ +#define NVGPU_GPU_EVENT_ALARM_GPU_LOST 8 + +struct nvgpu_gpu_event_info { + __u32 event_id; /* NVGPU_GPU_EVENT_* */ + __u32 reserved; + __u64 timestamp; /* GPU timestamp */ +}; + +struct nvgpu_gpu_set_event_filter_args { + + /* in: Flags (not currently used). */ + __u32 flags; + + /* in: Size of event filter in 32-bit words */ + __u32 size; + + /* in: Address of buffer containing bit mask of events. + * Bit #n is set if event #n should be monitored. + */ + __u64 buffer; +}; + +#define NVGPU_EVENT_IOCTL_SET_FILTER \ + _IOW(NVGPU_EVENT_IOCTL_MAGIC, 1, struct nvgpu_gpu_set_event_filter_args) +#define NVGPU_EVENT_IOCTL_LAST \ + _IOC_NR(NVGPU_EVENT_IOCTL_SET_FILTER) +#define NVGPU_EVENT_IOCTL_MAX_ARG_SIZE \ + sizeof(struct nvgpu_gpu_set_event_filter_args) + /* * /dev/nvhost-tsg-gpu device * -- cgit v1.2.2