summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorThomas Fleury <tfleury@nvidia.com>2016-11-07 13:17:56 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2016-12-15 13:15:00 -0500
commitec011cd1ee2fb3389d96ba717ad549cd12c69db1 (patch)
tree5d55b26e9b2e7bb2949b20feb50f3828e7dfcc99 /include
parent0250221955bca63ad1ff7e3474e4886132cf8445 (diff)
gpu: nvgpu: add device alarms
Add event definitions for: - Clock alarm (target frequency not met) - Thermal alarm (temperature above threshold) - Power alarm (power above threshold) - GPU shut down Jira DNVGPU-186 Change-Id: I52edd44352ed0cba83033949272f41cc9e1c630f Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1249342 (cherry picked from commit 67a6681aade241ff24982771778f7e2193d1cd7f) Reviewed-on: http://git-master/r/1267157 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'include')
-rw-r--r--include/uapi/linux/nvgpu.h77
1 files changed, 73 insertions, 4 deletions
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 0aeea4f0..746ae94a 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -126,6 +126,10 @@ struct nvgpu_gpu_zbc_query_table_args {
126#define NVGPU_GPU_FLAGS_SUPPORT_GET_POWER (1ULL << 12) 126#define NVGPU_GPU_FLAGS_SUPPORT_GET_POWER (1ULL << 12)
127/* NVGPU_GPU_IOCTL_GET_TEMPERATURE is available */ 127/* NVGPU_GPU_IOCTL_GET_TEMPERATURE is available */
128#define NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE (1ULL << 13) 128#define NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE (1ULL << 13)
129/* NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT is available */
130#define NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT (1ULL << 14)
131/* NVGPU_GPU_IOCTL_GET_EVENT_FD is available */
132#define NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS (1ULL << 15)
129 133
130struct nvgpu_gpu_characteristics { 134struct nvgpu_gpu_characteristics {
131 __u32 arch; 135 __u32 arch;
@@ -223,6 +227,8 @@ struct nvgpu_gpu_characteristics {
223 - If the last field is reserved/padding, it is not 227 - If the last field is reserved/padding, it is not
224 generally safe to repurpose the field in future revisions. 228 generally safe to repurpose the field in future revisions.
225 */ 229 */
230 __s16 event_ioctl_nr_last;
231 __u16 pad[3];
226}; 232};
227 233
228struct nvgpu_gpu_get_characteristics { 234struct nvgpu_gpu_get_characteristics {
@@ -684,12 +690,18 @@ struct nvgpu_gpu_clk_set_info_args {
684 __s32 completion_fd; 690 __s32 completion_fd;
685}; 691};
686 692
687struct nvgpu_gpu_clk_get_event_fd_args { 693struct nvgpu_gpu_get_event_fd_args {
688 694
689 /* in: Flags (not currently used). */ 695 /* in: Flags (not currently used). */
690 __u32 flags; 696 __u32 flags;
691 697
692 /* out: File descriptor for events, i.e. any clock update. */ 698 /* out: File descriptor for events, i.e. clock update.
699 * On successful polling of this event_fd, application is
700 * expected to read status (nvgpu_gpu_event_info),
701 * which provides detailed event information
702 * For a poll operation, alarms will be reported with POLLPRI,
703 * and GPU shutdown will be reported with POLLHUP.
704 */
693 __s32 event_fd; 705 __s32 event_fd;
694}; 706};
695 707
@@ -815,8 +827,8 @@ struct nvgpu_gpu_get_temperature_args {
815 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args) 827 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args)
816#define NVGPU_GPU_IOCTL_CLK_SET_INFO \ 828#define NVGPU_GPU_IOCTL_CLK_SET_INFO \
817 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args) 829 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args)
818#define NVGPU_GPU_IOCTL_CLK_GET_EVENT_FD \ 830#define NVGPU_GPU_IOCTL_GET_EVENT_FD \
819 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 32, struct nvgpu_gpu_clk_get_event_fd_args) 831 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 32, struct nvgpu_gpu_get_event_fd_args)
820#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \ 832#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \
821 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \ 833 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \
822 struct nvgpu_gpu_get_memory_state_args) 834 struct nvgpu_gpu_get_memory_state_args)
@@ -836,6 +848,63 @@ struct nvgpu_gpu_get_temperature_args {
836 sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) 848 sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
837 849
838/* 850/*
851 * Event session
852 *
853 * NVGPU_GPU_IOCTL_GET_EVENT_FD opens an event session.
854 * Below ioctls can be used on these sessions fds.
855 */
856#define NVGPU_EVENT_IOCTL_MAGIC 'E'
857
858/* Normal events (POLLIN) */
859/* Event associated to a VF update */
860#define NVGPU_GPU_EVENT_VF_UPDATE 0
861
862/* Recoverable alarms (POLLPRI) */
863/* Alarm when target frequency on any session is not possible */
864#define NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE 2
865/* Alarm when target frequency on current session is not possible */
866#define NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE 3
867/* Alarm when Clock Arbiter failed */
868#define NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED 4
869/* Alarm when VF table update failed */
870#define NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED 5
871/* Alarm on thermal condition */
872#define NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD 6
873/* Alarm on power condition */
874#define NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD 7
875
876/* Non recoverable alarm (POLLUP) */
877/* Alarm on GPU shutdown/fall from bus */
878#define NVGPU_GPU_EVENT_ALARM_GPU_LOST 8
879
880struct nvgpu_gpu_event_info {
881 __u32 event_id; /* NVGPU_GPU_EVENT_* */
882 __u32 reserved;
883 __u64 timestamp; /* GPU timestamp */
884};
885
886struct nvgpu_gpu_set_event_filter_args {
887
888 /* in: Flags (not currently used). */
889 __u32 flags;
890
891 /* in: Size of event filter in 32-bit words */
892 __u32 size;
893
894 /* in: Address of buffer containing bit mask of events.
895 * Bit #n is set if event #n should be monitored.
896 */
897 __u64 buffer;
898};
899
900#define NVGPU_EVENT_IOCTL_SET_FILTER \
901 _IOW(NVGPU_EVENT_IOCTL_MAGIC, 1, struct nvgpu_gpu_set_event_filter_args)
902#define NVGPU_EVENT_IOCTL_LAST \
903 _IOC_NR(NVGPU_EVENT_IOCTL_SET_FILTER)
904#define NVGPU_EVENT_IOCTL_MAX_ARG_SIZE \
905 sizeof(struct nvgpu_gpu_set_event_filter_args)
906
907/*
839 * /dev/nvhost-tsg-gpu device 908 * /dev/nvhost-tsg-gpu device
840 * 909 *
841 * Opening a '/dev/nvhost-tsg-gpu' device node creates a way to 910 * Opening a '/dev/nvhost-tsg-gpu' device node creates a way to