diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2017-05-16 06:47:58 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-14 19:33:32 -0400 |
commit | 7680fd689ecf7d11bf2dfdba41dc2f33cde2bbe7 (patch) | |
tree | b6df6640eaaa7e46deae7035572e7019f4311f7d /drivers/gpu/nvgpu/gk20a | |
parent | 3c3c39dfe0d1122efeead871eec7c37617404850 (diff) |
gpu: nvgpu: hold power ref for deterministic channels
To support deterministic channels even with platforms where railgating
is supported, have each deterministic-marked channel hold a power
reference during their lifetime, and skip taking power refs for jobs in
submit path for those.
Previously, railgating blocked deterministic submits in general because
of gk20a_busy()/gk20a_idle() calls in submit path possibly taking time
and more significantly because the gpu may need turning on which takes a
nondeterministic and long amount of time.
As an exception, gk20a_do_idle() can still block deterministic submits
until gk20a_do_unidle() is called. Add a rwsem to guard this. VPR resize
needs do_idle, which conflicts with deterministic channels' requirement
to keep the GPU on. This is documented in the ioctl header now.
Make NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING always
set in the gpu characteristics now that it's supported. The only thing
left now blocking NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is
the sync framework.
Make the channel debug dump show which channels are deterministic.
Bug 200291300
Jira NVGPU-70
Change-Id: I47b6f3a8517cd6e4255f6ca2855e3dd912e4f5f3
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1483038
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 169 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 6 |
6 files changed, 175 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 49d83069..90202fd7 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -575,8 +575,15 @@ unbind: | |||
575 | g->ops.fifo.unbind_channel(ch); | 575 | g->ops.fifo.unbind_channel(ch); |
576 | g->ops.fifo.free_inst(g, ch); | 576 | g->ops.fifo.free_inst(g, ch); |
577 | 577 | ||
578 | /* put back the channel-wide submit ref from init */ | ||
579 | if (ch->deterministic) { | ||
580 | down_read(&g->deterministic_busy); | ||
581 | ch->deterministic = false; | ||
582 | gk20a_idle(g); | ||
583 | up_read(&g->deterministic_busy); | ||
584 | } | ||
585 | |||
578 | ch->vpr = false; | 586 | ch->vpr = false; |
579 | ch->deterministic = false; | ||
580 | ch->vm = NULL; | 587 | ch->vm = NULL; |
581 | 588 | ||
582 | WARN_ON(ch->sync); | 589 | WARN_ON(ch->sync); |
@@ -1228,22 +1235,42 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, | |||
1228 | if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) | 1235 | if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) |
1229 | c->vpr = true; | 1236 | c->vpr = true; |
1230 | 1237 | ||
1231 | if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) | 1238 | if (flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) { |
1239 | down_read(&g->deterministic_busy); | ||
1240 | /* | ||
1241 | * Railgating isn't deterministic; instead of disallowing | ||
1242 | * railgating globally, take a power refcount for this | ||
1243 | * channel's lifetime. The gk20a_idle() pair for this happens | ||
1244 | * when the channel gets freed. | ||
1245 | * | ||
1246 | * Deterministic flag and this busy must be atomic within the | ||
1247 | * busy lock. | ||
1248 | */ | ||
1249 | err = gk20a_busy(g); | ||
1250 | if (err) { | ||
1251 | up_read(&g->deterministic_busy); | ||
1252 | return err; | ||
1253 | } | ||
1254 | |||
1232 | c->deterministic = true; | 1255 | c->deterministic = true; |
1256 | up_read(&g->deterministic_busy); | ||
1257 | } | ||
1233 | 1258 | ||
1234 | /* an address space needs to have been bound at this point. */ | 1259 | /* an address space needs to have been bound at this point. */ |
1235 | if (!gk20a_channel_as_bound(c)) { | 1260 | if (!gk20a_channel_as_bound(c)) { |
1236 | nvgpu_err(g, | 1261 | nvgpu_err(g, |
1237 | "not bound to an address space at time of gpfifo" | 1262 | "not bound to an address space at time of gpfifo" |
1238 | " allocation."); | 1263 | " allocation."); |
1239 | return -EINVAL; | 1264 | err = -EINVAL; |
1265 | goto clean_up_idle; | ||
1240 | } | 1266 | } |
1241 | ch_vm = c->vm; | 1267 | ch_vm = c->vm; |
1242 | 1268 | ||
1243 | if (c->gpfifo.mem.size) { | 1269 | if (c->gpfifo.mem.size) { |
1244 | nvgpu_err(g, "channel %d :" | 1270 | nvgpu_err(g, "channel %d :" |
1245 | "gpfifo already allocated", c->hw_chid); | 1271 | "gpfifo already allocated", c->hw_chid); |
1246 | return -EEXIST; | 1272 | err = -EEXIST; |
1273 | goto clean_up_idle; | ||
1247 | } | 1274 | } |
1248 | 1275 | ||
1249 | err = nvgpu_dma_alloc_map_sys(ch_vm, | 1276 | err = nvgpu_dma_alloc_map_sys(ch_vm, |
@@ -1336,6 +1363,13 @@ clean_up_unmap: | |||
1336 | nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); | 1363 | nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); |
1337 | clean_up: | 1364 | clean_up: |
1338 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); | 1365 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); |
1366 | clean_up_idle: | ||
1367 | if (c->deterministic) { | ||
1368 | down_read(&g->deterministic_busy); | ||
1369 | gk20a_idle(g); | ||
1370 | c->deterministic = false; | ||
1371 | up_read(&g->deterministic_busy); | ||
1372 | } | ||
1339 | nvgpu_err(g, "fail"); | 1373 | nvgpu_err(g, "fail"); |
1340 | return err; | 1374 | return err; |
1341 | } | 1375 | } |
@@ -2089,7 +2123,13 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2089 | 2123 | ||
2090 | channel_gk20a_free_job(c, job); | 2124 | channel_gk20a_free_job(c, job); |
2091 | job_finished = 1; | 2125 | job_finished = 1; |
2092 | gk20a_idle(g); | 2126 | |
2127 | /* | ||
2128 | * Deterministic channels have a channel-wide power reference; | ||
2129 | * for others, there's one per submit. | ||
2130 | */ | ||
2131 | if (!c->deterministic) | ||
2132 | gk20a_idle(g); | ||
2093 | 2133 | ||
2094 | if (!clean_all) { | 2134 | if (!clean_all) { |
2095 | /* Timeout isn't supported here so don't touch it. */ | 2135 | /* Timeout isn't supported here so don't touch it. */ |
@@ -2457,7 +2497,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2457 | * Job tracking is necessary for any of the following conditions: | 2497 | * Job tracking is necessary for any of the following conditions: |
2458 | * - pre- or post-fence functionality | 2498 | * - pre- or post-fence functionality |
2459 | * - channel wdt | 2499 | * - channel wdt |
2460 | * - GPU rail-gating | 2500 | * - GPU rail-gating with non-deterministic channels |
2461 | * - buffer refcounting | 2501 | * - buffer refcounting |
2462 | * | 2502 | * |
2463 | * If none of the conditions are met, then job tracking is not | 2503 | * If none of the conditions are met, then job tracking is not |
@@ -2467,7 +2507,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2467 | need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) || | 2507 | need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) || |
2468 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) || | 2508 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) || |
2469 | c->wdt_enabled || | 2509 | c->wdt_enabled || |
2470 | g->can_railgate || | 2510 | (g->can_railgate && !c->deterministic) || |
2471 | !skip_buffer_refcounting; | 2511 | !skip_buffer_refcounting; |
2472 | 2512 | ||
2473 | if (need_job_tracking) { | 2513 | if (need_job_tracking) { |
@@ -2495,7 +2535,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2495 | * behavior of the clean-up operation non-deterministic | 2535 | * behavior of the clean-up operation non-deterministic |
2496 | * (should not be performed in the submit path) | 2536 | * (should not be performed in the submit path) |
2497 | * - channel wdt | 2537 | * - channel wdt |
2498 | * - GPU rail-gating | 2538 | * - GPU rail-gating with non-deterministic channels |
2499 | * - buffer refcounting | 2539 | * - buffer refcounting |
2500 | * | 2540 | * |
2501 | * If none of the conditions are met, then deferred clean-up | 2541 | * If none of the conditions are met, then deferred clean-up |
@@ -2505,7 +2545,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2505 | need_deferred_cleanup = !c->deterministic || | 2545 | need_deferred_cleanup = !c->deterministic || |
2506 | need_sync_framework || | 2546 | need_sync_framework || |
2507 | c->wdt_enabled || | 2547 | c->wdt_enabled || |
2508 | g->can_railgate || | 2548 | (g->can_railgate && |
2549 | !c->deterministic) || | ||
2509 | !skip_buffer_refcounting; | 2550 | !skip_buffer_refcounting; |
2510 | 2551 | ||
2511 | /* | 2552 | /* |
@@ -2515,12 +2556,20 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2515 | if (c->deterministic && need_deferred_cleanup) | 2556 | if (c->deterministic && need_deferred_cleanup) |
2516 | return -EINVAL; | 2557 | return -EINVAL; |
2517 | 2558 | ||
2518 | /* released by job cleanup via syncpt or sema interrupt */ | 2559 | if (!c->deterministic) { |
2519 | err = gk20a_busy(g); | 2560 | /* |
2520 | if (err) { | 2561 | * Get a power ref unless this is a deterministic |
2521 | nvgpu_err(g, "failed to host gk20a to submit gpfifo, process %s", | 2562 | * channel that holds them during the channel lifetime. |
2522 | current->comm); | 2563 | * This one is released by gk20a_channel_clean_up_jobs, |
2523 | return err; | 2564 | * via syncpt or sema interrupt, whichever is used. |
2565 | */ | ||
2566 | err = gk20a_busy(g); | ||
2567 | if (err) { | ||
2568 | nvgpu_err(g, | ||
2569 | "failed to host gk20a to submit gpfifo, process %s", | ||
2570 | current->comm); | ||
2571 | return err; | ||
2572 | } | ||
2524 | } | 2573 | } |
2525 | 2574 | ||
2526 | if (!need_deferred_cleanup) { | 2575 | if (!need_deferred_cleanup) { |
@@ -2529,6 +2578,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2529 | } | 2578 | } |
2530 | } | 2579 | } |
2531 | 2580 | ||
2581 | |||
2582 | /* Grab access to HW to deal with do_idle */ | ||
2583 | if (c->deterministic) | ||
2584 | down_read(&g->deterministic_busy); | ||
2585 | |||
2532 | trace_gk20a_channel_submit_gpfifo(g->name, | 2586 | trace_gk20a_channel_submit_gpfifo(g->name, |
2533 | c->hw_chid, | 2587 | c->hw_chid, |
2534 | num_entries, | 2588 | num_entries, |
@@ -2601,6 +2655,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2601 | 2655 | ||
2602 | g->ops.fifo.userd_gp_put(g, c); | 2656 | g->ops.fifo.userd_gp_put(g, c); |
2603 | 2657 | ||
2658 | /* No hw access beyond this point */ | ||
2659 | if (c->deterministic) | ||
2660 | up_read(&g->deterministic_busy); | ||
2661 | |||
2604 | trace_gk20a_channel_submitted_gpfifo(g->name, | 2662 | trace_gk20a_channel_submitted_gpfifo(g->name, |
2605 | c->hw_chid, | 2663 | c->hw_chid, |
2606 | num_entries, | 2664 | num_entries, |
@@ -2622,11 +2680,90 @@ clean_up: | |||
2622 | gk20a_dbg_fn("fail"); | 2680 | gk20a_dbg_fn("fail"); |
2623 | gk20a_fence_put(pre_fence); | 2681 | gk20a_fence_put(pre_fence); |
2624 | gk20a_fence_put(post_fence); | 2682 | gk20a_fence_put(post_fence); |
2625 | if (need_deferred_cleanup) | 2683 | if (c->deterministic) |
2684 | up_read(&g->deterministic_busy); | ||
2685 | else if (need_deferred_cleanup) | ||
2626 | gk20a_idle(g); | 2686 | gk20a_idle(g); |
2687 | |||
2627 | return err; | 2688 | return err; |
2628 | } | 2689 | } |
2629 | 2690 | ||
2691 | /* | ||
2692 | * Stop deterministic channel activity for do_idle() when power needs to go off | ||
2693 | * momentarily but deterministic channels keep power refs for potentially a | ||
2694 | * long time. | ||
2695 | * | ||
2696 | * Takes write access on g->deterministic_busy. | ||
2697 | * | ||
2698 | * Must be paired with gk20a_channel_deterministic_unidle(). | ||
2699 | */ | ||
2700 | void gk20a_channel_deterministic_idle(struct gk20a *g) | ||
2701 | { | ||
2702 | struct fifo_gk20a *f = &g->fifo; | ||
2703 | u32 chid; | ||
2704 | |||
2705 | /* Grab exclusive access to the hw to block new submits */ | ||
2706 | down_write(&g->deterministic_busy); | ||
2707 | |||
2708 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2709 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2710 | |||
2711 | if (!gk20a_channel_get(ch)) | ||
2712 | continue; | ||
2713 | |||
2714 | if (ch->deterministic) { | ||
2715 | /* | ||
2716 | * Drop the power ref taken when setting deterministic | ||
2717 | * flag. deterministic_unidle will put this and the | ||
2718 | * channel ref back. | ||
2719 | * | ||
2720 | * Hold the channel ref: it must not get freed in | ||
2721 | * between. A race could otherwise result in lost | ||
2722 | * gk20a_busy() via unidle, and in unbalanced | ||
2723 | * gk20a_idle() via closing the channel. | ||
2724 | */ | ||
2725 | gk20a_idle(g); | ||
2726 | } else { | ||
2727 | /* Not interesting, carry on. */ | ||
2728 | gk20a_channel_put(ch); | ||
2729 | } | ||
2730 | } | ||
2731 | } | ||
2732 | |||
2733 | /* | ||
2734 | * Allow deterministic channel activity again for do_unidle(). | ||
2735 | * | ||
2736 | * This releases write access on g->deterministic_busy. | ||
2737 | */ | ||
2738 | void gk20a_channel_deterministic_unidle(struct gk20a *g) | ||
2739 | { | ||
2740 | struct fifo_gk20a *f = &g->fifo; | ||
2741 | u32 chid; | ||
2742 | |||
2743 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2744 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2745 | |||
2746 | if (!gk20a_channel_get(ch)) | ||
2747 | continue; | ||
2748 | |||
2749 | /* | ||
2750 | * Deterministic state changes inside deterministic_busy lock, | ||
2751 | * which we took in deterministic_idle. | ||
2752 | */ | ||
2753 | if (ch->deterministic) { | ||
2754 | if (gk20a_busy(g)) | ||
2755 | nvgpu_err(g, "cannot busy() again!"); | ||
2756 | /* Took this in idle() */ | ||
2757 | gk20a_channel_put(ch); | ||
2758 | } | ||
2759 | |||
2760 | gk20a_channel_put(ch); | ||
2761 | } | ||
2762 | |||
2763 | /* Release submits, new deterministic channels and frees */ | ||
2764 | up_write(&g->deterministic_busy); | ||
2765 | } | ||
2766 | |||
2630 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) | 2767 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) |
2631 | { | 2768 | { |
2632 | struct channel_gk20a *c = g->fifo.channel+chid; | 2769 | struct channel_gk20a *c = g->fifo.channel+chid; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9872e1b2..ca042883 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -328,6 +328,9 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | |||
328 | int gk20a_channel_suspend(struct gk20a *g); | 328 | int gk20a_channel_suspend(struct gk20a *g); |
329 | int gk20a_channel_resume(struct gk20a *g); | 329 | int gk20a_channel_resume(struct gk20a *g); |
330 | 330 | ||
331 | void gk20a_channel_deterministic_idle(struct gk20a *g); | ||
332 | void gk20a_channel_deterministic_unidle(struct gk20a *g); | ||
333 | |||
331 | int nvgpu_channel_worker_init(struct gk20a *g); | 334 | int nvgpu_channel_worker_init(struct gk20a *g); |
332 | void nvgpu_channel_worker_deinit(struct gk20a *g); | 335 | void nvgpu_channel_worker_deinit(struct gk20a *g); |
333 | 336 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 5a571dc8..37e19ef8 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -3494,10 +3494,11 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, | |||
3494 | syncpointa = inst_mem[ram_fc_syncpointa_w()]; | 3494 | syncpointa = inst_mem[ram_fc_syncpointa_w()]; |
3495 | syncpointb = inst_mem[ram_fc_syncpointb_w()]; | 3495 | syncpointb = inst_mem[ram_fc_syncpointb_w()]; |
3496 | 3496 | ||
3497 | gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, | 3497 | gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", hw_chid, |
3498 | g->name, | 3498 | g->name, |
3499 | ch_state->pid, | 3499 | ch_state->pid, |
3500 | ch_state->refs); | 3500 | ch_state->refs, |
3501 | ch_state->deterministic ? ", deterministic" : ""); | ||
3501 | gk20a_debug_output(o, "channel status: %s in use %s %s\n", | 3502 | gk20a_debug_output(o, "channel status: %s in use %s %s\n", |
3502 | ccsr_channel_enable_v(channel) ? "" : "not", | 3503 | ccsr_channel_enable_v(channel) ? "" : "not", |
3503 | gk20a_decode_ccsr_chan_status(status), | 3504 | gk20a_decode_ccsr_chan_status(status), |
@@ -3576,6 +3577,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, | |||
3576 | 3577 | ||
3577 | ch_state[chid]->pid = ch->pid; | 3578 | ch_state[chid]->pid = ch->pid; |
3578 | ch_state[chid]->refs = atomic_read(&ch->ref_count); | 3579 | ch_state[chid]->refs = atomic_read(&ch->ref_count); |
3580 | ch_state[chid]->deterministic = ch->deterministic; | ||
3579 | nvgpu_mem_rd_n(g, &ch->inst_block, 0, | 3581 | nvgpu_mem_rd_n(g, &ch->inst_block, 0, |
3580 | &ch_state[chid]->inst_block[0], | 3582 | &ch_state[chid]->inst_block[0], |
3581 | ram_in_alloc_size_v()); | 3583 | ram_in_alloc_size_v()); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 228e5130..1566302f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -216,6 +216,7 @@ static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level) | |||
216 | struct ch_state { | 216 | struct ch_state { |
217 | int pid; | 217 | int pid; |
218 | int refs; | 218 | int refs; |
219 | bool deterministic; | ||
219 | u32 inst_block[0]; | 220 | u32 inst_block[0]; |
220 | }; | 221 | }; |
221 | 222 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 8624d601..13635706 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -456,22 +456,19 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
456 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; | 456 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; |
457 | 457 | ||
458 | /* | 458 | /* |
459 | * Railgating needs job tracking which prevents fast submits. They're | 459 | * Fast submits are supported as long as the user doesn't request |
460 | * supported otherwise, provided that the user doesn't request anything | 460 | * anything that depends on job tracking. (Here, fast means strictly no |
461 | * that depends on job tracking. (Here, fast means strictly no | ||
462 | * metadata, just the gpfifo contents are copied and gp_put updated). | 461 | * metadata, just the gpfifo contents are copied and gp_put updated). |
463 | */ | 462 | */ |
464 | if (!g->can_railgate) | 463 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; |
465 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; | ||
466 | 464 | ||
467 | /* | 465 | /* |
468 | * Railgating and sync framework require deferred job cleanup which | 466 | * Sync framework requires deferred job cleanup, wrapping syncs in FDs, |
469 | * prevents deterministic submits. They're supported otherwise, | 467 | * and other heavy stuff, which prevents deterministic submits. This is |
470 | * provided that the user doesn't request anything that depends on | 468 | * supported otherwise, provided that the user doesn't request anything |
471 | * deferred cleanup. | 469 | * that depends on deferred cleanup. |
472 | */ | 470 | */ |
473 | if (!g->can_railgate | 471 | if (!gk20a_channel_sync_needs_sync_framework(g)) |
474 | && !gk20a_channel_sync_needs_sync_framework(g)) | ||
475 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL; | 472 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL; |
476 | 473 | ||
477 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; | 474 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 1d867912..79118fca 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1025,6 +1025,12 @@ struct gk20a { | |||
1025 | u32 log_trace; | 1025 | u32 log_trace; |
1026 | 1026 | ||
1027 | struct rw_semaphore busy_lock; | 1027 | struct rw_semaphore busy_lock; |
1028 | /* | ||
1029 | * Guards access to hardware when usual gk20a_{busy,idle} are skipped | ||
1030 | * for submits and held for channel lifetime but dropped for an ongoing | ||
1031 | * gk20a_do_idle(). | ||
1032 | */ | ||
1033 | struct rw_semaphore deterministic_busy; | ||
1028 | 1034 | ||
1029 | struct nvgpu_falcon pmu_flcn; | 1035 | struct nvgpu_falcon pmu_flcn; |
1030 | struct nvgpu_falcon sec2_flcn; | 1036 | struct nvgpu_falcon sec2_flcn; |