diff options
author | David Li <davli@nvidia.com> | 2018-04-26 05:00:01 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-05-18 02:34:20 -0400 |
commit | a807cf20419af737a79a3d0c7fcc1068ac6b724a (patch) | |
tree | 4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /include/uapi/linux | |
parent | 8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff) |
gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST
Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist,
and optionally check host and FECS status to preempt pending load of
context not belonging to the calling channel on GR engine during context
switch.
This should be called immediately after a submit to decrease worst case
submit to start latency for high interleave channel.
There is less than 0.002% chance that the ioctl blocks up to couple
miliseconds due to race condition of FECS status changing while being read.
For GV11B it will always preempt pending load of unwanted context since
there is no chance that ioctl blocks due to race condition.
Also fix bug with host reschedule for multiple runlists which needs to
write both runlist registers.
Bug 1987640
Bug 1924808
Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848
Signed-off-by: David Li <davli@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1549050
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'include/uapi/linux')
-rw-r--r-- | include/uapi/linux/nvgpu.h | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index d97f8fb6..b14610bd 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args { | |||
141 | #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) | 141 | #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) |
142 | /* IO coherence support is available */ | 142 | /* IO coherence support is available */ |
143 | #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) | 143 | #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) |
144 | /* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ | 144 | /* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */ |
145 | #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) | 145 | #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) |
146 | /* subcontexts are available */ | 146 | /* subcontexts are available */ |
147 | #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22) | 147 | #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22) |
@@ -1477,8 +1477,6 @@ struct nvgpu_fence { | |||
1477 | #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4) | 1477 | #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4) |
1478 | /* skip buffer refcounting during submit */ | 1478 | /* skip buffer refcounting during submit */ |
1479 | #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) | 1479 | #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) |
1480 | /* expire current timeslice and reschedule runlist from front */ | ||
1481 | #define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6) | ||
1482 | 1480 | ||
1483 | struct nvgpu_submit_gpfifo_args { | 1481 | struct nvgpu_submit_gpfifo_args { |
1484 | __u64 gpfifo; | 1482 | __u64 gpfifo; |
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args { | |||
1659 | __u32 syncpoint_max; /* out */ | 1657 | __u32 syncpoint_max; /* out */ |
1660 | }; | 1658 | }; |
1661 | 1659 | ||
1660 | struct nvgpu_reschedule_runlist_args { | ||
1661 | #define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT (1 << 0) | ||
1662 | __u32 flags; | ||
1663 | }; | ||
1664 | |||
1662 | #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ | 1665 | #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ |
1663 | _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) | 1666 | _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) |
1664 | #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ | 1667 | #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ |
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args { | |||
1711 | _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args) | 1714 | _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args) |
1712 | #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \ | 1715 | #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \ |
1713 | _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args) | 1716 | _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args) |
1717 | #define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST \ | ||
1718 | _IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args) | ||
1714 | 1719 | ||
1715 | #define NVGPU_IOCTL_CHANNEL_LAST \ | 1720 | #define NVGPU_IOCTL_CHANNEL_LAST \ |
1716 | _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT) | 1721 | _IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST) |
1717 | #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) | 1722 | #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) |
1718 | 1723 | ||
1719 | /* | 1724 | /* |