gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST

Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: David Li <davli@nvidia.com> 2018-04-26 05:00:01 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-05-18 02:34:20 -0400
commit: a807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree: 4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parent: 8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)
1 files changed, 89 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 22dc1d60..c94fc536 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -53,6 +53,7 @@
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
 #define FECS_METHOD_WFI_RESTORE 0x80000
+#define FECS_MAILBOX_0_ACK_RESTORE 0x4
 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
                                            u32 chid, bool add,
@@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
        u32 new_buf;
        struct channel_gk20a *ch = NULL;
        struct tsg_gk20a *tsg = NULL;
-        u32 count = 0;
        u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
        runlist = &f->runlist_info[runlist_id];
@@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
                        ret = -E2BIG;
                        goto clean_up;
                }
-                count = (runlist_end - runlist_entry_base) / runlist_entry_words;
+                runlist->count = (runlist_end - runlist_entry_base) /
-                WARN_ON(count > f->num_runlist_entries);
+                        runlist_entry_words;
+                WARN_ON(runlist->count > f->num_runlist_entries);
        } else  /* suspend to remove all channels */
-                count = 0;
+                runlist->count = 0;
-        g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf);
+        g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
        if (wait_for_finish) {
                ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
@@ -3406,31 +3407,98 @@ end:
        return ret;
 }
+/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
+static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
+                bool wait_preempt)
+{
+        struct gk20a *g = ch->g;
+        struct fifo_runlist_info_gk20a *runlist =
+                &g->fifo.runlist_info[ch->runlist_id];
+        int ret = 0;
+        u32 gr_eng_id = 0;
+        u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
+        s32 preempt_id = -1;
+        u32 preempt_type = 0;
+        if (1 != gk20a_fifo_get_engine_ids(
+                g, &gr_eng_id, 1, ENGINE_GR_GK20A))
+                return ret;
+        if (!(runlist->eng_bitmask & (1 << gr_eng_id)))
+                return ret;
+        if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
+                fifo_preempt_pending_true_f())
+                return ret;
+        fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+        engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
+        ctxstat = fifo_engine_status_ctx_status_v(engstat);
+        if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
+                /* host switching to next context, preempt that if needed */
+                preempt_id = fifo_engine_status_next_id_v(engstat);
+                preempt_type = fifo_engine_status_next_id_type_v(engstat);
+        } else
+                return ret;
+        if (preempt_id == ch->tsgid && preempt_type)
+                return ret;
+        fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+        if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
+                fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
+                /* preempt useless if FECS acked save and started restore */
+                return ret;
+        }
+        gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
+#ifdef TRACEPOINTS_ENABLED
+        trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
+                fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
+                gk20a_readl(g, fifo_preempt_r()));
+#endif
+        if (wait_preempt) {
+                g->ops.fifo.is_preempt_pending(
+                        g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
+        }
+#ifdef TRACEPOINTS_ENABLED
+        trace_gk20a_reschedule_preempted_next(ch->chid);
+#endif
+        return ret;
+}
+int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
+{
+        return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
+}
 /* trigger host to expire current timeslice and reschedule runlist from front */
-int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id)
+int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
+                bool wait_preempt)
 {
+        struct gk20a *g = ch->g;
        struct fifo_runlist_info_gk20a *runlist;
        u32 token = PMU_INVALID_MUTEX_OWNER_ID;
        u32 mutex_ret;
        int ret = 0;
-        runlist = &g->fifo.runlist_info[runlist_id];
+        runlist = &g->fifo.runlist_info[ch->runlist_id];
-        if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {
+        if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock))
-                mutex_ret = nvgpu_pmu_mutex_acquire(
+                return -EBUSY;
-                        &g->pmu, PMU_MUTEX_ID_FIFO, &token);
-                gk20a_writel(g, fifo_runlist_r(),
+        mutex_ret = nvgpu_pmu_mutex_acquire(
-                        gk20a_readl(g, fifo_runlist_r()));
+                &g->pmu, PMU_MUTEX_ID_FIFO, &token);
-                gk20a_fifo_runlist_wait_pending(g, runlist_id);
+        g->ops.fifo.runlist_hw_submit(
+                g, ch->runlist_id, runlist->count, runlist->cur_buffer);
+        if (preempt_next)
+                __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
+        gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
+        if (!mutex_ret)
+                nvgpu_pmu_mutex_release(
+                        &g->pmu, PMU_MUTEX_ID_FIFO, &token);
+        nvgpu_mutex_release(&runlist->runlist_lock);
-                if (!mutex_ret)
-                        nvgpu_pmu_mutex_release(
-                                &g->pmu, PMU_MUTEX_ID_FIFO, &token);
-                nvgpu_mutex_release(&runlist->runlist_lock);
-        } else {
-                /* someone else is writing fifo_runlist_r so not needed here */
-                ret = -EBUSY;
-        }
        return ret;
 }
author	David Li <davli@nvidia.com>	2018-04-26 05:00:01 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-05-18 02:34:20 -0400
commit	a807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree	4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parent	8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 22dc1d60..c94fc536 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -53,6 +53,7 @@
53	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>	53	#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
54		54
55	#define FECS_METHOD_WFI_RESTORE 0x80000	55	#define FECS_METHOD_WFI_RESTORE 0x80000
		56	#define FECS_MAILBOX_0_ACK_RESTORE 0x4
56		57
57	static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,	58	static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
58	u32 chid, bool add,	59	u32 chid, bool add,
@@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3282	u32 new_buf;	3283	u32 new_buf;
3283	struct channel_gk20a *ch = NULL;	3284	struct channel_gk20a *ch = NULL;
3284	struct tsg_gk20a *tsg = NULL;	3285	struct tsg_gk20a *tsg = NULL;
3285	u32 count = 0;
3286	u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);	3286	u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3287		3287
3288	runlist = &f->runlist_info[runlist_id];	3288	runlist = &f->runlist_info[runlist_id];
@@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3345	ret = -E2BIG;	3345	ret = -E2BIG;
3346	goto clean_up;	3346	goto clean_up;
3347	}	3347	}
3348	count = (runlist_end - runlist_entry_base) / runlist_entry_words;	3348	runlist->count = (runlist_end - runlist_entry_base) /
3349	WARN_ON(count > f->num_runlist_entries);	3349	runlist_entry_words;
		3350	WARN_ON(runlist->count > f->num_runlist_entries);
3350	} else /* suspend to remove all channels */	3351	} else /* suspend to remove all channels */
3351	count = 0;	3352	runlist->count = 0;
3352		3353
3353	g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf);	3354	g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3354		3355
3355	if (wait_for_finish) {	3356	if (wait_for_finish) {
3356	ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);	3357	ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
@@ -3406,31 +3407,98 @@ end:
3406	return ret;	3407	return ret;
3407	}	3408	}
3408		3409
		3410	/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
		3411	static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
		3412	bool wait_preempt)
		3413	{
		3414	struct gk20a *g = ch->g;
		3415	struct fifo_runlist_info_gk20a *runlist =
		3416	&g->fifo.runlist_info[ch->runlist_id];
		3417	int ret = 0;
		3418	u32 gr_eng_id = 0;
		3419	u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
		3420	s32 preempt_id = -1;
		3421	u32 preempt_type = 0;
		3422
		3423	if (1 != gk20a_fifo_get_engine_ids(
		3424	g, &gr_eng_id, 1, ENGINE_GR_GK20A))
		3425	return ret;
		3426	if (!(runlist->eng_bitmask & (1 << gr_eng_id)))
		3427	return ret;
		3428
		3429	if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
		3430	fifo_preempt_pending_true_f())
		3431	return ret;
		3432
		3433	fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
		3434	engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
		3435	ctxstat = fifo_engine_status_ctx_status_v(engstat);
		3436	if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
		3437	/* host switching to next context, preempt that if needed */
		3438	preempt_id = fifo_engine_status_next_id_v(engstat);
		3439	preempt_type = fifo_engine_status_next_id_type_v(engstat);
		3440	} else
		3441	return ret;
		3442	if (preempt_id == ch->tsgid && preempt_type)
		3443	return ret;
		3444	fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
		3445	if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE \|\|
		3446	fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
		3447	/* preempt useless if FECS acked save and started restore */
		3448	return ret;
		3449	}
		3450
		3451	gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
		3452	#ifdef TRACEPOINTS_ENABLED
		3453	trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
		3454	fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
		3455	gk20a_readl(g, fifo_preempt_r()));
		3456	#endif
		3457	if (wait_preempt) {
		3458	g->ops.fifo.is_preempt_pending(
		3459	g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
		3460	}
		3461	#ifdef TRACEPOINTS_ENABLED
		3462	trace_gk20a_reschedule_preempted_next(ch->chid);
		3463	#endif
		3464	return ret;
		3465	}
		3466
		3467	int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
		3468	{
		3469	return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
		3470	}
		3471
3409	/* trigger host to expire current timeslice and reschedule runlist from front */	3472	/* trigger host to expire current timeslice and reschedule runlist from front */
3410	int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id)	3473	int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
		3474	bool wait_preempt)
3411	{	3475	{
		3476	struct gk20a *g = ch->g;
3412	struct fifo_runlist_info_gk20a *runlist;	3477	struct fifo_runlist_info_gk20a *runlist;
3413	u32 token = PMU_INVALID_MUTEX_OWNER_ID;	3478	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3414	u32 mutex_ret;	3479	u32 mutex_ret;
3415	int ret = 0;	3480	int ret = 0;
3416		3481
3417	runlist = &g->fifo.runlist_info[runlist_id];	3482	runlist = &g->fifo.runlist_info[ch->runlist_id];
3418	if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) {	3483	if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock))
3419	mutex_ret = nvgpu_pmu_mutex_acquire(	3484	return -EBUSY;
3420	&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3421		3485
3422	gk20a_writel(g, fifo_runlist_r(),	3486	mutex_ret = nvgpu_pmu_mutex_acquire(
3423	gk20a_readl(g, fifo_runlist_r()));	3487	&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3424	gk20a_fifo_runlist_wait_pending(g, runlist_id);	3488
		3489	g->ops.fifo.runlist_hw_submit(
		3490	g, ch->runlist_id, runlist->count, runlist->cur_buffer);
		3491
		3492	if (preempt_next)
		3493	__locked_fifo_reschedule_preempt_next(ch, wait_preempt);
		3494
		3495	gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
		3496
		3497	if (!mutex_ret)
		3498	nvgpu_pmu_mutex_release(
		3499	&g->pmu, PMU_MUTEX_ID_FIFO, &token);
		3500	nvgpu_mutex_release(&runlist->runlist_lock);
3425		3501
3426	if (!mutex_ret)
3427	nvgpu_pmu_mutex_release(
3428	&g->pmu, PMU_MUTEX_ID_FIFO, &token);
3429	nvgpu_mutex_release(&runlist->runlist_lock);
3430	} else {
3431	/* someone else is writing fifo_runlist_r so not needed here */
3432	ret = -EBUSY;
3433	}
3434	return ret;	3502	return ret;
3435	}	3503	}
3436		3504