gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST

Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: David Li <davli@nvidia.com> 2018-04-26 05:00:01 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-05-18 02:34:20 -0400
commit: a807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree: 4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /include
parent: 8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)
2 files changed, 55 insertions, 5 deletions
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index 99726e4c..ef51451a 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -1,7 +1,7 @@
 /*
 * gk20a event logging to ftrace.
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo,
                __entry->flags, __entry->incr_id, __entry->incr_value)
 );
+TRACE_EVENT(gk20a_reschedule_preempt_next,
+                TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2,
+                        u32 preempt),
+                TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt),
+        TP_STRUCT__entry(
+                __field(u32, chid)
+                __field(u32, fecs0)
+                __field(u32, engstat)
+                __field(u32, fecs1)
+                __field(u32, fecs2)
+                __field(u32, preempt)
+        ),
+        TP_fast_assign(
+                __entry->chid = chid;
+                __entry->fecs0 = fecs0;
+                __entry->engstat = engstat;
+                __entry->fecs1 = fecs1;
+                __entry->fecs2 = fecs2;
+                __entry->preempt = preempt;
+        ),
+        TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x,"
+                " preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat,
+                __entry->fecs1, __entry->fecs2, __entry->preempt)
+);
+TRACE_EVENT(gk20a_reschedule_preempted_next,
+                TP_PROTO(u32 chid),
+                TP_ARGS(chid),
+        TP_STRUCT__entry(
+                __field(u32, chid)
+        ),
+        TP_fast_assign(
+                __entry->chid = chid;
+        ),
+        TP_printk("chid=%d", __entry->chid)
+);
 TRACE_EVENT(gk20a_channel_reset,
                TP_PROTO(u32 chid, u32 tsgid),
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index d97f8fb6..b14610bd 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
 /* IO coherence support is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE            (1ULL << 20)
-/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */
+/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST      (1ULL << 21)
 /*  subcontexts are available */
 #define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS         (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI  (1 << 4)
 /* skip buffer refcounting during submit */
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING       (1 << 5)
-/* expire current timeslice and reschedule runlist from front */
-#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST    (1 << 6)
 struct nvgpu_submit_gpfifo_args {
        __u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
        __u32 syncpoint_max;    /* out */
 };
+struct nvgpu_reschedule_runlist_args {
+#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT           (1 << 0)
+        __u32 flags;
+};
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD        \
        _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
        _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
 #define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
        _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
+#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST  \
+        _IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)
 #define NVGPU_IOCTL_CHANNEL_LAST        \
-        _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT)
+        _IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
 /*
author	David Li <davli@nvidia.com>	2018-04-26 05:00:01 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-05-18 02:34:20 -0400
commit	a807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree	4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /include
parent	8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)

diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index 99726e4c..ef51451a 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* gk20a event logging to ftrace.	2	* gk20a event logging to ftrace.
3	*	3	*
4	* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* This program is free software; you can redistribute it and/or modify it	6	* This program is free software; you can redistribute it and/or modify it
7	* under the terms and conditions of the GNU General Public License,	7	* under the terms and conditions of the GNU General Public License,
@@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo,
346	__entry->flags, __entry->incr_id, __entry->incr_value)	346	__entry->flags, __entry->incr_id, __entry->incr_value)
347	);	347	);
348		348
		349	TRACE_EVENT(gk20a_reschedule_preempt_next,
		350	TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2,
		351	u32 preempt),
		352
		353	TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt),
		354
		355	TP_STRUCT__entry(
		356	__field(u32, chid)
		357	__field(u32, fecs0)
		358	__field(u32, engstat)
		359	__field(u32, fecs1)
		360	__field(u32, fecs2)
		361	__field(u32, preempt)
		362	),
		363
		364	TP_fast_assign(
		365	__entry->chid = chid;
		366	__entry->fecs0 = fecs0;
		367	__entry->engstat = engstat;
		368	__entry->fecs1 = fecs1;
		369	__entry->fecs2 = fecs2;
		370	__entry->preempt = preempt;
		371	),
		372
		373	TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x,"
		374	" preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat,
		375	__entry->fecs1, __entry->fecs2, __entry->preempt)
		376	);
		377
		378	TRACE_EVENT(gk20a_reschedule_preempted_next,
		379	TP_PROTO(u32 chid),
		380
		381	TP_ARGS(chid),
		382
		383	TP_STRUCT__entry(
		384	__field(u32, chid)
		385	),
		386
		387	TP_fast_assign(
		388	__entry->chid = chid;
		389	),
		390
		391	TP_printk("chid=%d", __entry->chid)
		392	);
		393
349	TRACE_EVENT(gk20a_channel_reset,	394	TRACE_EVENT(gk20a_channel_reset,
350	TP_PROTO(u32 chid, u32 tsgid),	395	TP_PROTO(u32 chid, u32 tsgid),
351		396


diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index d97f8fb6..b14610bd 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
141	#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)	141	#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
142	/* IO coherence support is available */	142	/* IO coherence support is available */
143	#define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20)	143	#define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20)
144	/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */	144	/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
145	#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)	145	#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
146	/* subcontexts are available */	146	/* subcontexts are available */
147	#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22)	147	#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
1477	#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4)	1477	#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4)
1478	/* skip buffer refcounting during submit */	1478	/* skip buffer refcounting during submit */
1479	#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)	1479	#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
1480	/* expire current timeslice and reschedule runlist from front */
1481	#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
1482		1480
1483	struct nvgpu_submit_gpfifo_args {	1481	struct nvgpu_submit_gpfifo_args {
1484	__u64 gpfifo;	1482	__u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
1659	__u32 syncpoint_max; /* out */	1657	__u32 syncpoint_max; /* out */
1660	};	1658	};
1661		1659
		1660	struct nvgpu_reschedule_runlist_args {
		1661	#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT (1 << 0)
		1662	__u32 flags;
		1663	};
		1664
1662	#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \	1665	#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \
1663	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)	1666	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
1664	#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \	1667	#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
1711	_IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)	1714	_IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
1712	#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \	1715	#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
1713	_IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)	1716	_IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
		1717	#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST \
		1718	_IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)
1714		1719
1715	#define NVGPU_IOCTL_CHANNEL_LAST \	1720	#define NVGPU_IOCTL_CHANNEL_LAST \
1716	_IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT)	1721	_IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
1717	#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)	1722	#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
1718		1723
1719	/*	1724	/*