summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Li <davli@nvidia.com>2018-04-26 05:00:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-18 02:34:20 -0400
commita807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree4efc94d09217bd5e7fdad973b8dacfdee9bab8dd
parent8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)
gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST
Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c7
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c25
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c110
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c8
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.h1
-rw-r--r--drivers/gpu/nvgpu/gv11b/gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/enabled.h2
-rw-r--r--include/trace/events/gk20a.h47
-rw-r--r--include/uapi/linux/nvgpu.h13
13 files changed, 182 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 1e170b30..f189d3ed 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -64,9 +64,6 @@ u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
64 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) 64 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
65 flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; 65 flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
66 66
67 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST)
68 flags |= NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST;
69
70 return flags; 67 return flags;
71} 68}
72 69
@@ -1008,10 +1005,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1008 1005
1009 g->ops.fifo.userd_gp_put(g, c); 1006 g->ops.fifo.userd_gp_put(g, c);
1010 1007
1011 if ((NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST & flags) &&
1012 g->ops.fifo.reschedule_runlist)
1013 g->ops.fifo.reschedule_runlist(g, c->runlist_id);
1014
1015 /* No hw access beyond this point */ 1008 /* No hw access beyond this point */
1016 if (c->deterministic) 1009 if (c->deterministic)
1017 nvgpu_rwsem_up_read(&g->deterministic_busy); 1010 nvgpu_rwsem_up_read(&g->deterministic_busy);
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 606c5251..c1492cad 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -789,10 +789,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
789 if (ch->has_timedout) 789 if (ch->has_timedout)
790 return -ETIMEDOUT; 790 return -ETIMEDOUT;
791 791
792 if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & args->flags) &&
793 !capable(CAP_SYS_NICE))
794 return -EPERM;
795
796 nvgpu_get_fence_args(&args->fence, &fence); 792 nvgpu_get_fence_args(&args->fence, &fence);
797 submit_flags = 793 submit_flags =
798 nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); 794 nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
@@ -1291,6 +1287,27 @@ long gk20a_channel_ioctl(struct file *filp,
1291 err = gk20a_fifo_preempt(ch->g, ch); 1287 err = gk20a_fifo_preempt(ch->g, ch);
1292 gk20a_idle(ch->g); 1288 gk20a_idle(ch->g);
1293 break; 1289 break;
1290 case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST:
1291 if (!capable(CAP_SYS_NICE)) {
1292 err = -EPERM;
1293 break;
1294 }
1295 if (!ch->g->ops.fifo.reschedule_runlist) {
1296 err = -ENOSYS;
1297 break;
1298 }
1299 err = gk20a_busy(ch->g);
1300 if (err) {
1301 dev_err(dev,
1302 "%s: failed to host gk20a for ioctl cmd: 0x%x",
1303 __func__, cmd);
1304 break;
1305 }
1306 err = ch->g->ops.fifo.reschedule_runlist(ch,
1307 NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT &
1308 ((struct nvgpu_reschedule_runlist_args *)buf)->flags);
1309 gk20a_idle(ch->g);
1310 break;
1294 case NVGPU_IOCTL_CHANNEL_FORCE_RESET: 1311 case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
1295 err = gk20a_busy(ch->g); 1312 err = gk20a_busy(ch->g);
1296 if (err) { 1313 if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 5e8cab0d..f95184be 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -54,7 +54,6 @@ struct fifo_profile_gk20a;
54#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1 << 3) 54#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1 << 3)
55#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1 << 4) 55#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1 << 4)
56#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) 56#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
57#define NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
58 57
59/* 58/*
60 * The binary format of 'struct nvgpu_channel_fence' introduced here 59 * The binary format of 'struct nvgpu_channel_fence' introduced here
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 22dc1d60..c94fc536 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -53,6 +53,7 @@
53#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 53#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
54 54
55#define FECS_METHOD_WFI_RESTORE 0x80000 55#define FECS_METHOD_WFI_RESTORE 0x80000
56#define FECS_MAILBOX_0_ACK_RESTORE 0x4
56 57
57static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 58static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
58 u32 chid, bool add, 59 u32 chid, bool add,
@@ -3282,7 +3283,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3282 u32 new_buf; 3283 u32 new_buf;
3283 struct channel_gk20a *ch = NULL; 3284 struct channel_gk20a *ch = NULL;
3284 struct tsg_gk20a *tsg = NULL; 3285 struct tsg_gk20a *tsg = NULL;
3285 u32 count = 0;
3286 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32); 3286 u32 runlist_entry_words = f->runlist_entry_size / sizeof(u32);
3287 3287
3288 runlist = &f->runlist_info[runlist_id]; 3288 runlist = &f->runlist_info[runlist_id];
@@ -3345,12 +3345,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3345 ret = -E2BIG; 3345 ret = -E2BIG;
3346 goto clean_up; 3346 goto clean_up;
3347 } 3347 }
3348 count = (runlist_end - runlist_entry_base) / runlist_entry_words; 3348 runlist->count = (runlist_end - runlist_entry_base) /
3349 WARN_ON(count > f->num_runlist_entries); 3349 runlist_entry_words;
3350 WARN_ON(runlist->count > f->num_runlist_entries);
3350 } else /* suspend to remove all channels */ 3351 } else /* suspend to remove all channels */
3351 count = 0; 3352 runlist->count = 0;
3352 3353
3353 g->ops.fifo.runlist_hw_submit(g, runlist_id, count, new_buf); 3354 g->ops.fifo.runlist_hw_submit(g, runlist_id, runlist->count, new_buf);
3354 3355
3355 if (wait_for_finish) { 3356 if (wait_for_finish) {
3356 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id); 3357 ret = g->ops.fifo.runlist_wait_pending(g, runlist_id);
@@ -3406,31 +3407,98 @@ end:
3406 return ret; 3407 return ret;
3407} 3408}
3408 3409
3410/* trigger host preempt of GR pending load ctx if that ctx is not for ch */
3411static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
3412 bool wait_preempt)
3413{
3414 struct gk20a *g = ch->g;
3415 struct fifo_runlist_info_gk20a *runlist =
3416 &g->fifo.runlist_info[ch->runlist_id];
3417 int ret = 0;
3418 u32 gr_eng_id = 0;
3419 u32 engstat = 0, ctxstat = 0, fecsstat0 = 0, fecsstat1 = 0;
3420 s32 preempt_id = -1;
3421 u32 preempt_type = 0;
3422
3423 if (1 != gk20a_fifo_get_engine_ids(
3424 g, &gr_eng_id, 1, ENGINE_GR_GK20A))
3425 return ret;
3426 if (!(runlist->eng_bitmask & (1 << gr_eng_id)))
3427 return ret;
3428
3429 if (wait_preempt && gk20a_readl(g, fifo_preempt_r()) &
3430 fifo_preempt_pending_true_f())
3431 return ret;
3432
3433 fecsstat0 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3434 engstat = gk20a_readl(g, fifo_engine_status_r(gr_eng_id));
3435 ctxstat = fifo_engine_status_ctx_status_v(engstat);
3436 if (ctxstat == fifo_engine_status_ctx_status_ctxsw_switch_v()) {
3437 /* host switching to next context, preempt that if needed */
3438 preempt_id = fifo_engine_status_next_id_v(engstat);
3439 preempt_type = fifo_engine_status_next_id_type_v(engstat);
3440 } else
3441 return ret;
3442 if (preempt_id == ch->tsgid && preempt_type)
3443 return ret;
3444 fecsstat1 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
3445 if (fecsstat0 != FECS_MAILBOX_0_ACK_RESTORE ||
3446 fecsstat1 != FECS_MAILBOX_0_ACK_RESTORE) {
3447 /* preempt useless if FECS acked save and started restore */
3448 return ret;
3449 }
3450
3451 gk20a_fifo_issue_preempt(g, preempt_id, preempt_type);
3452#ifdef TRACEPOINTS_ENABLED
3453 trace_gk20a_reschedule_preempt_next(ch->chid, fecsstat0, engstat,
3454 fecsstat1, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)),
3455 gk20a_readl(g, fifo_preempt_r()));
3456#endif
3457 if (wait_preempt) {
3458 g->ops.fifo.is_preempt_pending(
3459 g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
3460 }
3461#ifdef TRACEPOINTS_ENABLED
3462 trace_gk20a_reschedule_preempted_next(ch->chid);
3463#endif
3464 return ret;
3465}
3466
3467int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
3468{
3469 return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
3470}
3471
3409/* trigger host to expire current timeslice and reschedule runlist from front */ 3472/* trigger host to expire current timeslice and reschedule runlist from front */
3410int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id) 3473int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
3474 bool wait_preempt)
3411{ 3475{
3476 struct gk20a *g = ch->g;
3412 struct fifo_runlist_info_gk20a *runlist; 3477 struct fifo_runlist_info_gk20a *runlist;
3413 u32 token = PMU_INVALID_MUTEX_OWNER_ID; 3478 u32 token = PMU_INVALID_MUTEX_OWNER_ID;
3414 u32 mutex_ret; 3479 u32 mutex_ret;
3415 int ret = 0; 3480 int ret = 0;
3416 3481
3417 runlist = &g->fifo.runlist_info[runlist_id]; 3482 runlist = &g->fifo.runlist_info[ch->runlist_id];
3418 if (nvgpu_mutex_tryacquire(&runlist->runlist_lock)) { 3483 if (!nvgpu_mutex_tryacquire(&runlist->runlist_lock))
3419 mutex_ret = nvgpu_pmu_mutex_acquire( 3484 return -EBUSY;
3420 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3421 3485
3422 gk20a_writel(g, fifo_runlist_r(), 3486 mutex_ret = nvgpu_pmu_mutex_acquire(
3423 gk20a_readl(g, fifo_runlist_r())); 3487 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3424 gk20a_fifo_runlist_wait_pending(g, runlist_id); 3488
3489 g->ops.fifo.runlist_hw_submit(
3490 g, ch->runlist_id, runlist->count, runlist->cur_buffer);
3491
3492 if (preempt_next)
3493 __locked_fifo_reschedule_preempt_next(ch, wait_preempt);
3494
3495 gk20a_fifo_runlist_wait_pending(g, ch->runlist_id);
3496
3497 if (!mutex_ret)
3498 nvgpu_pmu_mutex_release(
3499 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3500 nvgpu_mutex_release(&runlist->runlist_lock);
3425 3501
3426 if (!mutex_ret)
3427 nvgpu_pmu_mutex_release(
3428 &g->pmu, PMU_MUTEX_ID_FIFO, &token);
3429 nvgpu_mutex_release(&runlist->runlist_lock);
3430 } else {
3431 /* someone else is writing fifo_runlist_r so not needed here */
3432 ret = -EBUSY;
3433 }
3434 return ret; 3502 return ret;
3435} 3503}
3436 3504
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 5866dd1b..576a4ac8 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -95,6 +95,7 @@ struct fifo_runlist_info_gk20a {
95 u32 pbdma_bitmask; /* pbdmas supported for this runlist*/ 95 u32 pbdma_bitmask; /* pbdmas supported for this runlist*/
96 u32 eng_bitmask; /* engines using this runlist */ 96 u32 eng_bitmask; /* engines using this runlist */
97 u32 reset_eng_bitmask; /* engines to be reset during recovery */ 97 u32 reset_eng_bitmask; /* engines to be reset during recovery */
98 u32 count; /* cached runlist_hw_submit parameter */
98 bool stopped; 99 bool stopped;
99 bool support_tsg; 100 bool support_tsg;
100 /* protect ch/tsg/runlist preempt & runlist update */ 101 /* protect ch/tsg/runlist preempt & runlist update */
@@ -249,7 +250,9 @@ void gk20a_fifo_disable_tsg_sched(struct gk20a *g, struct tsg_gk20a *tsg);
249 250
250u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid); 251u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 chid);
251 252
252int gk20a_fifo_reschedule_runlist(struct gk20a *g, u32 runlist_id); 253int gk20a_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
254int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
255 bool wait_preempt);
253 256
254int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid, 257int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
255 bool add, bool wait_for_finish); 258 bool add, bool wait_for_finish);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 17f662df..45fa58f1 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -566,7 +566,8 @@ struct gpu_ops {
566 int (*tsg_verify_channel_status)(struct channel_gk20a *ch); 566 int (*tsg_verify_channel_status)(struct channel_gk20a *ch);
567 void (*tsg_verify_status_ctx_reload)(struct channel_gk20a *ch); 567 void (*tsg_verify_status_ctx_reload)(struct channel_gk20a *ch);
568 void (*tsg_verify_status_faulted)(struct channel_gk20a *ch); 568 void (*tsg_verify_status_faulted)(struct channel_gk20a *ch);
569 int (*reschedule_runlist)(struct gk20a *g, u32 runlist_id); 569 int (*reschedule_runlist)(struct channel_gk20a *ch,
570 bool preempt_next);
570 int (*update_runlist)(struct gk20a *g, u32 runlist_id, 571 int (*update_runlist)(struct gk20a *g, u32 runlist_id,
571 u32 chid, bool add, 572 u32 chid, bool add,
572 bool wait_for_finish); 573 bool wait_for_finish);
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 30e03092..f00e806f 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -664,6 +664,13 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
664 return runlists_mask; 664 return runlists_mask;
665} 665}
666 666
667int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next)
668{
669 /* gv11b allows multiple outstanding preempts,
670 so always preempt next for best reschedule effect */
671 return nvgpu_fifo_reschedule_runlist(ch, true, false);
672}
673
667static void gv11b_fifo_issue_runlist_preempt(struct gk20a *g, 674static void gv11b_fifo_issue_runlist_preempt(struct gk20a *g,
668 u32 runlists_mask) 675 u32 runlists_mask)
669{ 676{
@@ -842,7 +849,6 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
842 return ret; 849 return ret;
843} 850}
844 851
845
846static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask) 852static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask)
847{ 853{
848 int ret = 0; 854 int ret = 0;
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
index 3f58f927..1ae3c93e 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -76,6 +76,7 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g,
76void gv11b_dump_eng_status(struct gk20a *g, 76void gv11b_dump_eng_status(struct gk20a *g,
77 struct gk20a_debug_output *o); 77 struct gk20a_debug_output *o);
78u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g); 78u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g);
79int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
79int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id, 80int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
80 unsigned int id_type, unsigned int timeout_rc_type); 81 unsigned int id_type, unsigned int timeout_rc_type);
81int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid); 82int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid);
diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c
index c1ad7944..09fe9a45 100644
--- a/drivers/gpu/nvgpu/gv11b/gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gv11b.c
@@ -154,6 +154,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g)
154 __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true); 154 __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true);
155 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); 155 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
156 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true); 156 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true);
157 __nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, true);
157 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true); 158 __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true);
158 __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); 159 __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);
159 160
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 290a9452..ff779075 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -503,6 +503,7 @@ static const struct gpu_ops gv11b_ops = {
503 .tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status, 503 .tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status,
504 .tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload, 504 .tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
505 .tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted, 505 .tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted,
506 .reschedule_runlist = gv11b_fifo_reschedule_runlist,
506 .update_runlist = gk20a_fifo_update_runlist, 507 .update_runlist = gk20a_fifo_update_runlist,
507 .trigger_mmu_fault = NULL, 508 .trigger_mmu_fault = NULL,
508 .get_mmu_fault_info = NULL, 509 .get_mmu_fault_info = NULL,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 24748a19..9ae249a2 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -96,7 +96,7 @@ struct gk20a;
96#define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING 35 96#define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING 35
97/* Deterministic submits are supported even with job tracking */ 97/* Deterministic submits are supported even with job tracking */
98#define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL 36 98#define NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL 36
99/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ 99/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
100#define NVGPU_SUPPORT_RESCHEDULE_RUNLIST 37 100#define NVGPU_SUPPORT_RESCHEDULE_RUNLIST 37
101 101
102/* NVGPU_GPU_IOCTL_GET_EVENT_FD is available */ 102/* NVGPU_GPU_IOCTL_GET_EVENT_FD is available */
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index 99726e4c..ef51451a 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * gk20a event logging to ftrace. 2 * gk20a event logging to ftrace.
3 * 3 *
4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo,
346 __entry->flags, __entry->incr_id, __entry->incr_value) 346 __entry->flags, __entry->incr_id, __entry->incr_value)
347); 347);
348 348
349TRACE_EVENT(gk20a_reschedule_preempt_next,
350 TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2,
351 u32 preempt),
352
353 TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt),
354
355 TP_STRUCT__entry(
356 __field(u32, chid)
357 __field(u32, fecs0)
358 __field(u32, engstat)
359 __field(u32, fecs1)
360 __field(u32, fecs2)
361 __field(u32, preempt)
362 ),
363
364 TP_fast_assign(
365 __entry->chid = chid;
366 __entry->fecs0 = fecs0;
367 __entry->engstat = engstat;
368 __entry->fecs1 = fecs1;
369 __entry->fecs2 = fecs2;
370 __entry->preempt = preempt;
371 ),
372
373 TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x,"
374 " preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat,
375 __entry->fecs1, __entry->fecs2, __entry->preempt)
376);
377
378TRACE_EVENT(gk20a_reschedule_preempted_next,
379 TP_PROTO(u32 chid),
380
381 TP_ARGS(chid),
382
383 TP_STRUCT__entry(
384 __field(u32, chid)
385 ),
386
387 TP_fast_assign(
388 __entry->chid = chid;
389 ),
390
391 TP_printk("chid=%d", __entry->chid)
392);
393
349TRACE_EVENT(gk20a_channel_reset, 394TRACE_EVENT(gk20a_channel_reset,
350 TP_PROTO(u32 chid, u32 tsgid), 395 TP_PROTO(u32 chid, u32 tsgid),
351 396
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index d97f8fb6..b14610bd 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
141#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) 141#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
142/* IO coherence support is available */ 142/* IO coherence support is available */
143#define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) 143#define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20)
144/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ 144/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
145#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) 145#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
146/* subcontexts are available */ 146/* subcontexts are available */
147#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22) 147#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
1477#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4) 1477#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4)
1478/* skip buffer refcounting during submit */ 1478/* skip buffer refcounting during submit */
1479#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) 1479#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
1480/* expire current timeslice and reschedule runlist from front */
1481#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
1482 1480
1483struct nvgpu_submit_gpfifo_args { 1481struct nvgpu_submit_gpfifo_args {
1484 __u64 gpfifo; 1482 __u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
1659 __u32 syncpoint_max; /* out */ 1657 __u32 syncpoint_max; /* out */
1660}; 1658};
1661 1659
1660struct nvgpu_reschedule_runlist_args {
1661#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT (1 << 0)
1662 __u32 flags;
1663};
1664
1662#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ 1665#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \
1663 _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) 1666 _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
1664#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ 1667#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
1711 _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args) 1714 _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
1712#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \ 1715#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
1713 _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args) 1716 _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
1717#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST \
1718 _IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)
1714 1719
1715#define NVGPU_IOCTL_CHANNEL_LAST \ 1720#define NVGPU_IOCTL_CHANNEL_LAST \
1716 _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT) 1721 _IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
1717#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) 1722#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
1718 1723
1719/* 1724/*