summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2017-05-16 06:47:58 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-06-14 19:33:32 -0400
commit7680fd689ecf7d11bf2dfdba41dc2f33cde2bbe7 (patch)
treeb6df6640eaaa7e46deae7035572e7019f4311f7d /drivers/gpu/nvgpu
parent3c3c39dfe0d1122efeead871eec7c37617404850 (diff)
gpu: nvgpu: hold power ref for deterministic channels
To support deterministic channels even with platforms where railgating is supported, have each deterministic-marked channel hold a power reference during their lifetime, and skip taking power refs for jobs in submit path for those. Previously, railgating blocked deterministic submits in general because of gk20a_busy()/gk20a_idle() calls in submit path possibly taking time and more significantly because the gpu may need turning on which takes a nondeterministic and long amount of time. As an exception, gk20a_do_idle() can still block deterministic submits until gk20a_do_unidle() is called. Add a rwsem to guard this. VPR resize needs do_idle, which conflicts with deterministic channels' requirement to keep the GPU on. This is documented in the ioctl header now. Make NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING always set in the gpu characteristics now that it's supported. The only thing left now blocking NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is the sync framework. Make the channel debug dump show which channels are deterministic. Bug 200291300 Jira NVGPU-70 Change-Id: I47b6f3a8517cd6e4255f6ca2855e3dd912e4f5f3 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1483038 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.c1
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c169
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c19
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
8 files changed, 185 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index bd9a4e77..a00880ed 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -39,6 +39,7 @@ static void nvgpu_init_vars(struct gk20a *g)
39 gk20a_init_gr(g); 39 gk20a_init_gr(g);
40 40
41 init_rwsem(&g->busy_lock); 41 init_rwsem(&g->busy_lock);
42 init_rwsem(&g->deterministic_busy);
42 43
43 nvgpu_spinlock_init(&g->mc_enable_lock); 44 nvgpu_spinlock_init(&g->mc_enable_lock);
44 45
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 34a0ded6..cbad3993 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -298,6 +298,12 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
298 bool is_railgated; 298 bool is_railgated;
299 int err = 0; 299 int err = 0;
300 300
301 /*
302 * Hold back deterministic submits and changes to deterministic
303 * channels - this must be outside the power busy locks.
304 */
305 gk20a_channel_deterministic_idle(g);
306
301 /* acquire busy lock to block other busy() calls */ 307 /* acquire busy lock to block other busy() calls */
302 down_write(&g->busy_lock); 308 down_write(&g->busy_lock);
303 309
@@ -403,6 +409,7 @@ fail_drop_usage_count:
403fail_timeout: 409fail_timeout:
404 nvgpu_mutex_release(&platform->railgate_lock); 410 nvgpu_mutex_release(&platform->railgate_lock);
405 up_write(&g->busy_lock); 411 up_write(&g->busy_lock);
412 gk20a_channel_deterministic_unidle(g);
406 return -EBUSY; 413 return -EBUSY;
407} 414}
408 415
@@ -456,6 +463,8 @@ int __gk20a_do_unidle(struct device *dev)
456 nvgpu_mutex_release(&platform->railgate_lock); 463 nvgpu_mutex_release(&platform->railgate_lock);
457 up_write(&g->busy_lock); 464 up_write(&g->busy_lock);
458 465
466 gk20a_channel_deterministic_unidle(g);
467
459 return 0; 468 return 0;
460} 469}
461 470
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 49d83069..90202fd7 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -575,8 +575,15 @@ unbind:
575 g->ops.fifo.unbind_channel(ch); 575 g->ops.fifo.unbind_channel(ch);
576 g->ops.fifo.free_inst(g, ch); 576 g->ops.fifo.free_inst(g, ch);
577 577
578 /* put back the channel-wide submit ref from init */
579 if (ch->deterministic) {
580 down_read(&g->deterministic_busy);
581 ch->deterministic = false;
582 gk20a_idle(g);
583 up_read(&g->deterministic_busy);
584 }
585
578 ch->vpr = false; 586 ch->vpr = false;
579 ch->deterministic = false;
580 ch->vm = NULL; 587 ch->vm = NULL;
581 588
582 WARN_ON(ch->sync); 589 WARN_ON(ch->sync);
@@ -1228,22 +1235,42 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
1228 if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) 1235 if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
1229 c->vpr = true; 1236 c->vpr = true;
1230 1237
1231 if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) 1238 if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) {
1239 down_read(&g->deterministic_busy);
1240 /*
1241 * Railgating isn't deterministic; instead of disallowing
1242 * railgating globally, take a power refcount for this
1243 * channel's lifetime. The gk20a_idle() pair for this happens
1244 * when the channel gets freed.
1245 *
1246 * Deterministic flag and this busy must be atomic within the
1247 * busy lock.
1248 */
1249 err = gk20a_busy(g);
1250 if (err) {
1251 up_read(&g->deterministic_busy);
1252 return err;
1253 }
1254
1232 c->deterministic = true; 1255 c->deterministic = true;
1256 up_read(&g->deterministic_busy);
1257 }
1233 1258
1234 /* an address space needs to have been bound at this point. */ 1259 /* an address space needs to have been bound at this point. */
1235 if (!gk20a_channel_as_bound(c)) { 1260 if (!gk20a_channel_as_bound(c)) {
1236 nvgpu_err(g, 1261 nvgpu_err(g,
1237 "not bound to an address space at time of gpfifo" 1262 "not bound to an address space at time of gpfifo"
1238 " allocation."); 1263 " allocation.");
1239 return -EINVAL; 1264 err = -EINVAL;
1265 goto clean_up_idle;
1240 } 1266 }
1241 ch_vm = c->vm; 1267 ch_vm = c->vm;
1242 1268
1243 if (c->gpfifo.mem.size) { 1269 if (c->gpfifo.mem.size) {
1244 nvgpu_err(g, "channel %d :" 1270 nvgpu_err(g, "channel %d :"
1245 "gpfifo already allocated", c->hw_chid); 1271 "gpfifo already allocated", c->hw_chid);
1246 return -EEXIST; 1272 err = -EEXIST;
1273 goto clean_up_idle;
1247 } 1274 }
1248 1275
1249 err = nvgpu_dma_alloc_map_sys(ch_vm, 1276 err = nvgpu_dma_alloc_map_sys(ch_vm,
@@ -1336,6 +1363,13 @@ clean_up_unmap:
1336 nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); 1363 nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem);
1337clean_up: 1364clean_up:
1338 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1365 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1366clean_up_idle:
1367 if (c->deterministic) {
1368 down_read(&g->deterministic_busy);
1369 gk20a_idle(g);
1370 c->deterministic = false;
1371 up_read(&g->deterministic_busy);
1372 }
1339 nvgpu_err(g, "fail"); 1373 nvgpu_err(g, "fail");
1340 return err; 1374 return err;
1341} 1375}
@@ -2089,7 +2123,13 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2089 2123
2090 channel_gk20a_free_job(c, job); 2124 channel_gk20a_free_job(c, job);
2091 job_finished = 1; 2125 job_finished = 1;
2092 gk20a_idle(g); 2126
2127 /*
2128 * Deterministic channels have a channel-wide power reference;
2129 * for others, there's one per submit.
2130 */
2131 if (!c->deterministic)
2132 gk20a_idle(g);
2093 2133
2094 if (!clean_all) { 2134 if (!clean_all) {
2095 /* Timeout isn't supported here so don't touch it. */ 2135 /* Timeout isn't supported here so don't touch it. */
@@ -2457,7 +2497,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2457 * Job tracking is necessary for any of the following conditions: 2497 * Job tracking is necessary for any of the following conditions:
2458 * - pre- or post-fence functionality 2498 * - pre- or post-fence functionality
2459 * - channel wdt 2499 * - channel wdt
2460 * - GPU rail-gating 2500 * - GPU rail-gating with non-deterministic channels
2461 * - buffer refcounting 2501 * - buffer refcounting
2462 * 2502 *
2463 * If none of the conditions are met, then job tracking is not 2503 * If none of the conditions are met, then job tracking is not
@@ -2467,7 +2507,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2467 need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) || 2507 need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
2468 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) || 2508 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
2469 c->wdt_enabled || 2509 c->wdt_enabled ||
2470 g->can_railgate || 2510 (g->can_railgate && !c->deterministic) ||
2471 !skip_buffer_refcounting; 2511 !skip_buffer_refcounting;
2472 2512
2473 if (need_job_tracking) { 2513 if (need_job_tracking) {
@@ -2495,7 +2535,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2495 * behavior of the clean-up operation non-deterministic 2535 * behavior of the clean-up operation non-deterministic
2496 * (should not be performed in the submit path) 2536 * (should not be performed in the submit path)
2497 * - channel wdt 2537 * - channel wdt
2498 * - GPU rail-gating 2538 * - GPU rail-gating with non-deterministic channels
2499 * - buffer refcounting 2539 * - buffer refcounting
2500 * 2540 *
2501 * If none of the conditions are met, then deferred clean-up 2541 * If none of the conditions are met, then deferred clean-up
@@ -2505,7 +2545,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2505 need_deferred_cleanup = !c->deterministic || 2545 need_deferred_cleanup = !c->deterministic ||
2506 need_sync_framework || 2546 need_sync_framework ||
2507 c->wdt_enabled || 2547 c->wdt_enabled ||
2508 g->can_railgate || 2548 (g->can_railgate &&
2549 !c->deterministic) ||
2509 !skip_buffer_refcounting; 2550 !skip_buffer_refcounting;
2510 2551
2511 /* 2552 /*
@@ -2515,12 +2556,20 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2515 if (c->deterministic && need_deferred_cleanup) 2556 if (c->deterministic && need_deferred_cleanup)
2516 return -EINVAL; 2557 return -EINVAL;
2517 2558
2518 /* released by job cleanup via syncpt or sema interrupt */ 2559 if (!c->deterministic) {
2519 err = gk20a_busy(g); 2560 /*
2520 if (err) { 2561 * Get a power ref unless this is a deterministic
2521 nvgpu_err(g, "failed to host gk20a to submit gpfifo, process %s", 2562 * channel that holds them during the channel lifetime.
2522 current->comm); 2563 * This one is released by gk20a_channel_clean_up_jobs,
2523 return err; 2564 * via syncpt or sema interrupt, whichever is used.
2565 */
2566 err = gk20a_busy(g);
2567 if (err) {
2568 nvgpu_err(g,
2569 "failed to host gk20a to submit gpfifo, process %s",
2570 current->comm);
2571 return err;
2572 }
2524 } 2573 }
2525 2574
2526 if (!need_deferred_cleanup) { 2575 if (!need_deferred_cleanup) {
@@ -2529,6 +2578,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2529 } 2578 }
2530 } 2579 }
2531 2580
2581
2582 /* Grab access to HW to deal with do_idle */
2583 if (c->deterministic)
2584 down_read(&g->deterministic_busy);
2585
2532 trace_gk20a_channel_submit_gpfifo(g->name, 2586 trace_gk20a_channel_submit_gpfifo(g->name,
2533 c->hw_chid, 2587 c->hw_chid,
2534 num_entries, 2588 num_entries,
@@ -2601,6 +2655,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2601 2655
2602 g->ops.fifo.userd_gp_put(g, c); 2656 g->ops.fifo.userd_gp_put(g, c);
2603 2657
2658 /* No hw access beyond this point */
2659 if (c->deterministic)
2660 up_read(&g->deterministic_busy);
2661
2604 trace_gk20a_channel_submitted_gpfifo(g->name, 2662 trace_gk20a_channel_submitted_gpfifo(g->name,
2605 c->hw_chid, 2663 c->hw_chid,
2606 num_entries, 2664 num_entries,
@@ -2622,11 +2680,90 @@ clean_up:
2622 gk20a_dbg_fn("fail"); 2680 gk20a_dbg_fn("fail");
2623 gk20a_fence_put(pre_fence); 2681 gk20a_fence_put(pre_fence);
2624 gk20a_fence_put(post_fence); 2682 gk20a_fence_put(post_fence);
2625 if (need_deferred_cleanup) 2683 if (c->deterministic)
2684 up_read(&g->deterministic_busy);
2685 else if (need_deferred_cleanup)
2626 gk20a_idle(g); 2686 gk20a_idle(g);
2687
2627 return err; 2688 return err;
2628} 2689}
2629 2690
2691/*
2692 * Stop deterministic channel activity for do_idle() when power needs to go off
2693 * momentarily but deterministic channels keep power refs for potentially a
2694 * long time.
2695 *
2696 * Takes write access on g->deterministic_busy.
2697 *
2698 * Must be paired with gk20a_channel_deterministic_unidle().
2699 */
2700void gk20a_channel_deterministic_idle(struct gk20a *g)
2701{
2702 struct fifo_gk20a *f = &g->fifo;
2703 u32 chid;
2704
2705 /* Grab exclusive access to the hw to block new submits */
2706 down_write(&g->deterministic_busy);
2707
2708 for (chid = 0; chid < f->num_channels; chid++) {
2709 struct channel_gk20a *ch = &f->channel[chid];
2710
2711 if (!gk20a_channel_get(ch))
2712 continue;
2713
2714 if (ch->deterministic) {
2715 /*
2716 * Drop the power ref taken when setting deterministic
2717 * flag. deterministic_unidle will put this and the
2718 * channel ref back.
2719 *
2720 * Hold the channel ref: it must not get freed in
2721 * between. A race could otherwise result in lost
2722 * gk20a_busy() via unidle, and in unbalanced
2723 * gk20a_idle() via closing the channel.
2724 */
2725 gk20a_idle(g);
2726 } else {
2727 /* Not interesting, carry on. */
2728 gk20a_channel_put(ch);
2729 }
2730 }
2731}
2732
2733/*
2734 * Allow deterministic channel activity again for do_unidle().
2735 *
2736 * This releases write access on g->deterministic_busy.
2737 */
2738void gk20a_channel_deterministic_unidle(struct gk20a *g)
2739{
2740 struct fifo_gk20a *f = &g->fifo;
2741 u32 chid;
2742
2743 for (chid = 0; chid < f->num_channels; chid++) {
2744 struct channel_gk20a *ch = &f->channel[chid];
2745
2746 if (!gk20a_channel_get(ch))
2747 continue;
2748
2749 /*
2750 * Deterministic state changes inside deterministic_busy lock,
2751 * which we took in deterministic_idle.
2752 */
2753 if (ch->deterministic) {
2754 if (gk20a_busy(g))
2755 nvgpu_err(g, "cannot busy() again!");
2756 /* Took this in idle() */
2757 gk20a_channel_put(ch);
2758 }
2759
2760 gk20a_channel_put(ch);
2761 }
2762
2763 /* Release submits, new deterministic channels and frees */
2764 up_write(&g->deterministic_busy);
2765}
2766
2630int gk20a_init_channel_support(struct gk20a *g, u32 chid) 2767int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2631{ 2768{
2632 struct channel_gk20a *c = g->fifo.channel+chid; 2769 struct channel_gk20a *c = g->fifo.channel+chid;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 9872e1b2..ca042883 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -328,6 +328,9 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
328int gk20a_channel_suspend(struct gk20a *g); 328int gk20a_channel_suspend(struct gk20a *g);
329int gk20a_channel_resume(struct gk20a *g); 329int gk20a_channel_resume(struct gk20a *g);
330 330
331void gk20a_channel_deterministic_idle(struct gk20a *g);
332void gk20a_channel_deterministic_unidle(struct gk20a *g);
333
331int nvgpu_channel_worker_init(struct gk20a *g); 334int nvgpu_channel_worker_init(struct gk20a *g);
332void nvgpu_channel_worker_deinit(struct gk20a *g); 335void nvgpu_channel_worker_deinit(struct gk20a *g);
333 336
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 5a571dc8..37e19ef8 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3494,10 +3494,11 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
3494 syncpointa = inst_mem[ram_fc_syncpointa_w()]; 3494 syncpointa = inst_mem[ram_fc_syncpointa_w()];
3495 syncpointb = inst_mem[ram_fc_syncpointb_w()]; 3495 syncpointb = inst_mem[ram_fc_syncpointb_w()];
3496 3496
3497 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, 3497 gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", hw_chid,
3498 g->name, 3498 g->name,
3499 ch_state->pid, 3499 ch_state->pid,
3500 ch_state->refs); 3500 ch_state->refs,
3501 ch_state->deterministic ? ", deterministic" : "");
3501 gk20a_debug_output(o, "channel status: %s in use %s %s\n", 3502 gk20a_debug_output(o, "channel status: %s in use %s %s\n",
3502 ccsr_channel_enable_v(channel) ? "" : "not", 3503 ccsr_channel_enable_v(channel) ? "" : "not",
3503 gk20a_decode_ccsr_chan_status(status), 3504 gk20a_decode_ccsr_chan_status(status),
@@ -3576,6 +3577,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
3576 3577
3577 ch_state[chid]->pid = ch->pid; 3578 ch_state[chid]->pid = ch->pid;
3578 ch_state[chid]->refs = atomic_read(&ch->ref_count); 3579 ch_state[chid]->refs = atomic_read(&ch->ref_count);
3580 ch_state[chid]->deterministic = ch->deterministic;
3579 nvgpu_mem_rd_n(g, &ch->inst_block, 0, 3581 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
3580 &ch_state[chid]->inst_block[0], 3582 &ch_state[chid]->inst_block[0],
3581 ram_in_alloc_size_v()); 3583 ram_in_alloc_size_v());
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 228e5130..1566302f 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -216,6 +216,7 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
216struct ch_state { 216struct ch_state {
217 int pid; 217 int pid;
218 int refs; 218 int refs;
219 bool deterministic;
219 u32 inst_block[0]; 220 u32 inst_block[0];
220}; 221};
221 222
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 8624d601..13635706 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -456,22 +456,19 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
456 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; 456 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
457 457
458 /* 458 /*
459 * Railgating needs job tracking which prevents fast submits. They're 459 * Fast submits are supported as long as the user doesn't request
460 * supported otherwise, provided that the user doesn't request anything 460 * anything that depends on job tracking. (Here, fast means strictly no
461 * that depends on job tracking. (Here, fast means strictly no
462 * metadata, just the gpfifo contents are copied and gp_put updated). 461 * metadata, just the gpfifo contents are copied and gp_put updated).
463 */ 462 */
464 if (!g->can_railgate) 463 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
465 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
466 464
467 /* 465 /*
468 * Railgating and sync framework require deferred job cleanup which 466 * Sync framework requires deferred job cleanup, wrapping syncs in FDs,
469 * prevents deterministic submits. They're supported otherwise, 467 * and other heavy stuff, which prevents deterministic submits. This is
470 * provided that the user doesn't request anything that depends on 468 * supported otherwise, provided that the user doesn't request anything
471 * deferred cleanup. 469 * that depends on deferred cleanup.
472 */ 470 */
473 if (!g->can_railgate 471 if (!gk20a_channel_sync_needs_sync_framework(g))
474 && !gk20a_channel_sync_needs_sync_framework(g))
475 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL; 472 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
476 473
477 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; 474 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 1d867912..79118fca 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1025,6 +1025,12 @@ struct gk20a {
1025 u32 log_trace; 1025 u32 log_trace;
1026 1026
1027 struct rw_semaphore busy_lock; 1027 struct rw_semaphore busy_lock;
1028 /*
1029 * Guards access to hardware when usual gk20a_{busy,idle} are skipped
1030 * for submits and held for channel lifetime but dropped for an ongoing
1031 * gk20a_do_idle().
1032 */
1033 struct rw_semaphore deterministic_busy;
1028 1034
1029 struct nvgpu_falcon pmu_flcn; 1035 struct nvgpu_falcon pmu_flcn;
1030 struct nvgpu_falcon sec2_flcn; 1036 struct nvgpu_falcon sec2_flcn;