summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorDavid Li <davli@nvidia.com>2018-04-26 05:00:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-18 02:34:20 -0400
commita807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parent8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)
gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST
Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c110
1 files changed, 89 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 22dc1d60..c94fc536 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -53,6 +53,7 @@
53#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 53#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
54 54
55#define FECS_METHOD_WFI_RESTORE 0x80000 55#define FECS_METHOD_WFI_RESTORE 0x80000
56#define FECS_MAILBOX_0_ACK_RESTORE 0x4
56 57
57static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 58static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
58 u32 chid, bool add, 59 u32 chid, bool add,
@@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3282 u32 new_buf; 3283 u32 new_buf;
3283 struct channel_gk20a *ch = NULL; 3284 struct channel_gk20a *ch = NULL;
3284 struct tsg_gk20a *tsg = NULL; 3285 struct tsg_gk20a *tsg = NULL;
3285 u32 count = 0;
3286 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32); 3286 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3287 3287
3288 runlist = &f->runlist_info[runlist_id]; 3288 runlist = &f->runlist_info[runlist_id];
@@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3345 ret = -E2BIG; 3345 ret = -E2BIG;
3346 goto clean_up; 3346 goto clean_up;
3347 } 3347 }
3348 count = (runlist_end - runlist_entry_base) / runlist_entry_words; 3348 runlist->count = (runlist_end - runlist_entry_base) /
3349 WARN_ON(count > f->num_runlist_entries); 3349 runlist_entry_words;
3350 WARN_ON(runlist->count > f->num_runlist_entries);
3350 } else /* suspend to remove all channels */ 3351 } else /* suspend to remove all channels */
3351 count = 0; 3352 runlist->count = 0;
3352 3353
3353 g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf); 3354 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3354 3355
3355 if (wait_for_finish) { 3356 if (wait_for_finish) {
3356 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id); 3357 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
@@ -3406,31 +3407,98 @@ end:
3406 return ret; 3407 return ret;
3407} 3408}
3408 3409
3410/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3411static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3412 bool wait_preempt)
3413{
3414 struct gk20a *g = ch->g;
3415 struct fifo_runlist_info_gk20a *runlist =
3416 &g->fifo.runlist_info[ch->runlist_id];
3417 int ret = 0;
3418 u32 gr_eng_id = 0;
3419 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3420 s32 preempt_id = -1;
3421 u32 preempt_type = 0;
3422
3423 if (1 != gk20a_fifo_get_engine_ids(
3424 g, &gr_eng_id, 1, ENGINE_GR_GK20A))
3425 return ret;
3426 if (!(runlist->eng_bitmask & (1 << gr_eng_id)))
3427 return ret;
3428
3429 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3430 fifo_preempt_pending_true_f())
3431 return ret;
3432
3433 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3434 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3435 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3436 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3437 /* host switching to next context, preempt that if needed */
3438 preempt_id = fifo_engine_status_next_id_v(engstat);
3439 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3440 } else
3441 return ret;
3442 if (preempt_id == ch->tsgid && preempt_type)
3443 return ret;
3444 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3445 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3446 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3447 /* preempt useless if FECS acked save and started restore */
3448 return ret;
3449 }
3450
3451 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3452#ifdef TRACEPOINTS_ENABLED
3453 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3454 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3455 gk20a_readl(g, fifo_preempt_r()));
3456#endif
3457 if (wait_preempt) {
3458 g->ops.fifo.is_preempt_pending(
3459 g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
3460 }
3461#ifdef TRACEPOINTS_ENABLED
3462 trace_gk20a_reschedule_preempted_next(ch->chid);
3463#endif
3464 return ret;
3465}
3466
3467int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3468{
3469 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3470}
3471
3409/* trigger host to expire current timeslice and reschedule runlist from front */ 3472/* trigger host to expire current timeslice and reschedule runlist from front */
3410int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id) 3473int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3474 bool wait_preempt)
3411{ 3475{
3476 struct gk20a *g = ch->g;
3412 struct fifo_runlist_info_gk20a *runlist; 3477 struct fifo_runlist_info_gk20a *runlist;
3413 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 3478 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3414 u32 mutex_ret; 3479 u32 mutex_ret;
3415 int ret = 0; 3480 int ret = 0;
3416 3481
3417 runlist = &g->fifo.runlist_info[runlist_id]; 3482 runlist = &g->fifo.runlist_info[ch->runlist_id];
3418 if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) { 3483 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock))
3419 mutex_ret = nvgpu_pmu_mutex_acquire( 3484 return -EBUSY;
3420 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3421 3485
3422 gk20a_writel(g, fifo_runlist_r(), 3486 mutex_ret = nvgpu_pmu_mutex_acquire(
3423 gk20a_readl(g, fifo_runlist_r())); 3487 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3424 gk20a_fifo_runlist_wait_pending(g, runlist_id); 3488
3489 g->ops.fifo.runlist_hw_submit(
3490 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3491
3492 if (preempt_next)
3493 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3494
3495 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3496
3497 if (!mutex_ret)
3498 nvgpu_pmu_mutex_release(
3499 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3500 nvgpu_mutex_release(&runlist->runlist_lock);
3425 3501
3426 if (!mutex_ret)
3427 nvgpu_pmu_mutex_release(
3428 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3429 nvgpu_mutex_release(&runlist->runlist_lock);
3430 } else {
3431 /* someone else is writing fifo_runlist_r so not needed here */
3432 ret = -EBUSY;
3433 }
3434 return ret; 3502 return ret;
3435} 3503}
3436 3504