diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2017-05-16 06:47:58 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-14 19:33:32 -0400 |
commit | 7680fd689ecf7d11bf2dfdba41dc2f33cde2bbe7 (patch) | |
tree | b6df6640eaaa7e46deae7035572e7019f4311f7d /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |
parent | 3c3c39dfe0d1122efeead871eec7c37617404850 (diff) |
gpu: nvgpu: hold power ref for deterministic channels
To support deterministic channels even with platforms where railgating
is supported, have each deterministic-marked channel hold a power
reference during their lifetime, and skip taking power refs for jobs in
submit path for those.
Previously, railgating blocked deterministic submits in general because
of gk20a_busy()/gk20a_idle() calls in submit path possibly taking time
and more significantly because the gpu may need turning on which takes a
nondeterministic and long amount of time.
As an exception, gk20a_do_idle() can still block deterministic submits
until gk20a_do_unidle() is called. Add a rwsem to guard this. VPR resize
needs do_idle, which conflicts with deterministic channels' requirement
to keep the GPU on. This is documented in the ioctl header now.
Make NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING always
set in the gpu characteristics now that it's supported. The only thing
left now blocking NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is
the sync framework.
Make the channel debug dump show which channels are deterministic.
Bug 200291300
Jira NVGPU-70
Change-Id: I47b6f3a8517cd6e4255f6ca2855e3dd912e4f5f3
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1483038
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 5a571dc8..37e19ef8 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -3494,10 +3494,11 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, | |||
3494 | syncpointa = inst_mem[ram_fc_syncpointa_w()]; | 3494 | syncpointa = inst_mem[ram_fc_syncpointa_w()]; |
3495 | syncpointb = inst_mem[ram_fc_syncpointb_w()]; | 3495 | syncpointb = inst_mem[ram_fc_syncpointb_w()]; |
3496 | 3496 | ||
3497 | gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, | 3497 | gk20a_debug_output(o, "%d-%s, pid %d, refs %d%s: ", hw_chid, |
3498 | g->name, | 3498 | g->name, |
3499 | ch_state->pid, | 3499 | ch_state->pid, |
3500 | ch_state->refs); | 3500 | ch_state->refs, |
3501 | ch_state->deterministic ? ", deterministic" : ""); | ||
3501 | gk20a_debug_output(o, "channel status: %s in use %s %s\n", | 3502 | gk20a_debug_output(o, "channel status: %s in use %s %s\n", |
3502 | ccsr_channel_enable_v(channel) ? "" : "not", | 3503 | ccsr_channel_enable_v(channel) ? "" : "not", |
3503 | gk20a_decode_ccsr_chan_status(status), | 3504 | gk20a_decode_ccsr_chan_status(status), |
@@ -3576,6 +3577,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, | |||
3576 | 3577 | ||
3577 | ch_state[chid]->pid = ch->pid; | 3578 | ch_state[chid]->pid = ch->pid; |
3578 | ch_state[chid]->refs = atomic_read(&ch->ref_count); | 3579 | ch_state[chid]->refs = atomic_read(&ch->ref_count); |
3580 | ch_state[chid]->deterministic = ch->deterministic; | ||
3579 | nvgpu_mem_rd_n(g, &ch->inst_block, 0, | 3581 | nvgpu_mem_rd_n(g, &ch->inst_block, 0, |
3580 | &ch_state[chid]->inst_block[0], | 3582 | &ch_state[chid]->inst_block[0], |
3581 | ram_in_alloc_size_v()); | 3583 | ram_in_alloc_size_v()); |