From a807cf20419af737a79a3d0c7fcc1068ac6b724a Mon Sep 17 00:00:00 2001
From: David Li <davli@nvidia.com>
Date: Thu, 26 Apr 2018 02:00:01 -0700
Subject: gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST

Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist,
and optionally check host and FECS status to preempt pending load of
context not belonging to the calling channel on GR engine during context
switch.
This should be called immediately after a submit to decrease worst case
submit to start latency for high interleave channel.
There is less than 0.002% chance that the ioctl blocks up to couple
miliseconds due to race condition of FECS status changing while being read.
For GV11B it will always preempt pending load of unwanted context since
there is no chance that ioctl blocks due to race condition.
Also fix bug with host reschedule for multiple runlists which needs to
write both runlist registers.

Bug 1987640
Bug 1924808
Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848
Signed-off-by: David Li <davli@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1549050
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 include/uapi/linux/nvgpu.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index d97f8fb6..b14610bd 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
 /* IO coherence support is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE		(1ULL << 20)
-/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
+/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST	(1ULL << 21)
 /*  subcontexts are available */
 #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS         (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI	(1 << 4)
 /* skip buffer refcounting during submit */
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING	(1 << 5)
-/* expire current timeslice and reschedule runlist from front */
-#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST	(1 << 6)
 
 struct nvgpu_submit_gpfifo_args {
 	__u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
 	__u32 syncpoint_max;	/* out */
 };
 
+struct nvgpu_reschedule_runlist_args {
+#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT           (1 << 0)
+	__u32 flags;
+};
+
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD	\
 	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT	\
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
 	_IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
 #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
 	_IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
+#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST	\
+	_IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)
 
 #define NVGPU_IOCTL_CHANNEL_LAST	\
-	_IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT)
+	_IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
 
 /*
-- 
cgit v1.2.2