summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorDavid Li <davli@nvidia.com>2018-04-26 05:00:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-18 02:34:20 -0400
commita807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /drivers/gpu/nvgpu/gk20a
parent8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)
gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST
Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c110
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
4 files changed, 95 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 5e8cab0d..f95184be 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -54,7 +54,6 @@ struct fifo_profile_gk20a;
54#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1 << 3) 54#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1 << 3)
55#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1 << 4) 55#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1 << 4)
56#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) 56#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
57#define NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
58 57
59/* 58/*
60 * The binary format of 'struct nvgpu_channel_fence' introduced here 59 * The binary format of 'struct nvgpu_channel_fence' introduced here
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 22dc1d60..c94fc536 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -53,6 +53,7 @@
53#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 53#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
54 54
55#define FECS_METHOD_WFI_RESTORE 0x80000 55#define FECS_METHOD_WFI_RESTORE 0x80000
56#define FECS_MAILBOX_0_ACK_RESTORE 0x4
56 57
57static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 58static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
58 u32 chid, bool add, 59 u32 chid, bool add,
@@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3282 u32 new_buf; 3283 u32 new_buf;
3283 struct channel_gk20a *ch = NULL; 3284 struct channel_gk20a *ch = NULL;
3284 struct tsg_gk20a *tsg = NULL; 3285 struct tsg_gk20a *tsg = NULL;
3285 u32 count = 0;
3286 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32); 3286 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3287 3287
3288 runlist = &f->runlist_info[runlist_id]; 3288 runlist = &f->runlist_info[runlist_id];
@@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3345 ret = -E2BIG; 3345 ret = -E2BIG;
3346 goto clean_up; 3346 goto clean_up;
3347 } 3347 }
3348 count = (runlist_end - runlist_entry_base) / runlist_entry_words; 3348 runlist->count = (runlist_end - runlist_entry_base) /
3349 WARN_ON(count > f->num_runlist_entries); 3349 runlist_entry_words;
3350 WARN_ON(runlist->count > f->num_runlist_entries);
3350 } else /* suspend to remove all channels */ 3351 } else /* suspend to remove all channels */
3351 count = 0; 3352 runlist->count = 0;
3352 3353
3353 g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf); 3354 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3354 3355
3355 if (wait_for_finish) { 3356 if (wait_for_finish) {
3356 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id); 3357 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
@@ -3406,31 +3407,98 @@ end:
3406 return ret; 3407 return ret;
3407} 3408}
3408 3409
3410/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3411static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3412 bool wait_preempt)
3413{
3414 struct gk20a *g = ch->g;
3415 struct fifo_runlist_info_gk20a *runlist =
3416 &g->fifo.runlist_info[ch->runlist_id];
3417 int ret = 0;
3418 u32 gr_eng_id = 0;
3419 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3420 s32 preempt_id = -1;
3421 u32 preempt_type = 0;
3422
3423 if (1 != gk20a_fifo_get_engine_ids(
3424 g, &gr_eng_id, 1, ENGINE_GR_GK20A))
3425 return ret;
3426 if (!(runlist->eng_bitmask & (1 << gr_eng_id)))
3427 return ret;
3428
3429 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3430 fifo_preempt_pending_true_f())
3431 return ret;
3432
3433 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3434 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3435 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3436 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3437 /* host switching to next context, preempt that if needed */
3438 preempt_id = fifo_engine_status_next_id_v(engstat);
3439 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3440 } else
3441 return ret;
3442 if (preempt_id == ch->tsgid && preempt_type)
3443 return ret;
3444 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3445 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3446 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3447 /* preempt useless if FECS acked save and started restore */
3448 return ret;
3449 }
3450
3451 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3452#ifdef TRACEPOINTS_ENABLED
3453 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3454 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3455 gk20a_readl(g, fifo_preempt_r()));
3456#endif
3457 if (wait_preempt) {
3458 g->ops.fifo.is_preempt_pending(
3459 g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
3460 }
3461#ifdef TRACEPOINTS_ENABLED
3462 trace_gk20a_reschedule_preempted_next(ch->chid);
3463#endif
3464 return ret;
3465}
3466
3467int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3468{
3469 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3470}
3471
3409/* trigger host to expire current timeslice and reschedule runlist from front */ 3472/* trigger host to expire current timeslice and reschedule runlist from front */
3410int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id) 3473int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3474 bool wait_preempt)
3411{ 3475{
3476 struct gk20a *g = ch->g;
3412 struct fifo_runlist_info_gk20a *runlist; 3477 struct fifo_runlist_info_gk20a *runlist;
3413 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 3478 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3414 u32 mutex_ret; 3479 u32 mutex_ret;
3415 int ret = 0; 3480 int ret = 0;
3416 3481
3417 runlist = &g->fifo.runlist_info[runlist_id]; 3482 runlist = &g->fifo.runlist_info[ch->runlist_id];
3418 if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) { 3483 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock))
3419 mutex_ret = nvgpu_pmu_mutex_acquire( 3484 return -EBUSY;
3420 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3421 3485
3422 gk20a_writel(g, fifo_runlist_r(), 3486 mutex_ret = nvgpu_pmu_mutex_acquire(
3423 gk20a_readl(g, fifo_runlist_r())); 3487 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3424 gk20a_fifo_runlist_wait_pending(g, runlist_id); 3488
3489 g->ops.fifo.runlist_hw_submit(
3490 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3491
3492 if (preempt_next)
3493 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3494
3495 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3496
3497 if (!mutex_ret)
3498 nvgpu_pmu_mutex_release(
3499 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3500 nvgpu_mutex_release(&runlist->runlist_lock);
3425 3501
3426 if (!mutex_ret)
3427 nvgpu_pmu_mutex_release(
3428 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3429 nvgpu_mutex_release(&runlist->runlist_lock);
3430 } else {
3431 /* someone else is writing fifo_runlist_r so not needed here */
3432 ret = -EBUSY;
3433 }
3434 return ret; 3502 return ret;
3435} 3503}
3436 3504
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 5866dd1b..576a4ac8 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -95,6 +95,7 @@ struct fifo_runlist_info_gk20a {
95 u32 pbdma_bitmask; /* pbdmas supported for this runlist*/ 95 u32 pbdma_bitmask; /* pbdmas supported for this runlist*/
96 u32 eng_bitmask; /* engines using this runlist */ 96 u32 eng_bitmask; /* engines using this runlist */
97 u32 reset_eng_bitmask; /* engines to be reset during recovery */ 97 u32 reset_eng_bitmask; /* engines to be reset during recovery */
98 u32 count; /* cached runlist_hw_submit parameter */
98 bool stopped; 99 bool stopped;
99 bool support_tsg; 100 bool support_tsg;
100 /* protect ch/tsg/runlist preempt & runlist update */ 101 /* protect ch/tsg/runlist preempt & runlist update */
@@ -249,7 +250,9 @@ void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);
249 250
250u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid); 251u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid);
251 252
252int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id); 253int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
254int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
255 bool wait_preempt);
253 256
254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid, 257int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
255 bool add, bool wait_for_finish); 258 bool add, bool wait_for_finish);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 17f662df..45fa58f1 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -566,7 +566,8 @@ struct gpu_ops {
566 int (*tsg_verify_channel_status)(struct channel_gk20a *ch); 566 int (*tsg_verify_channel_status)(struct channel_gk20a *ch);
567 void (*tsg_verify_status_ctx_reload)(struct channel_gk20a *ch); 567 void (*tsg_verify_status_ctx_reload)(struct channel_gk20a *ch);
568 void (*tsg_verify_status_faulted)(struct channel_gk20a *ch); 568 void (*tsg_verify_status_faulted)(struct channel_gk20a *ch);
569 int (*reschedule_runlist)(struct gk20a *g, u32 runlist_id); 569 int (*reschedule_runlist)(struct channel_gk20a *ch,
570 bool preempt_next);
570 int (*update_runlist)(struct gk20a *g, u32 runlist_id, 571 int (*update_runlist)(struct gk20a *g, u32 runlist_id,
571 u32 chid, bool add, 572 u32 chid, bool add,
572 bool wait_for_finish); 573 bool wait_for_finish);