diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-08-09 20:32:33 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-15 20:49:39 -0400 |
commit | 1e7f229e5d92078f772d4f81893b23504cd847a8 (patch) | |
tree | 2c2921d02f3a9c3ae1ff9bd5df4682493297bd09 /drivers/gpu/nvgpu | |
parent | 32127c65151ffc7288ff8483d5b18be77a662b0b (diff) |
gpu: nvgpu: Use correct aperture for perf inst_block
The perf inst block was being treated as vidmem (LFB - local
framebuffer) always, regardless of the type of nvgpu_mem used
for the instance block. On dGPUs this was fine becasue we
always allocate instance blocks from vidmem. Inst blocks are
allocated with nvgpu_dma_alloc() which chooses vidmem if
vidmem is present, otherwise falls back to sysmem.
When the above fall back logic was deleted this caused inst
blocks to always be allocated in sysmem, even for dGPUs. This
isn't a problem in an of itself but the logic for the perf
instance block bind operation assumed a VIDMEM inst_block.
Thus this patch uses the nvgpu_aperture_mask() function to
correctly program the required aperture target for the perf's
inst block bind operation.
JIRA NVGPU-990
Change-Id: If6f09a743ee2ad47a6dbfa28cb7c61f1461fd8a7
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1796388
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 7 |
2 files changed, 11 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 14ae17ea..4aeeec1c 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -130,6 +130,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch, | |||
130 | struct gk20a_cs_snapshot_client *cs_client) | 130 | struct gk20a_cs_snapshot_client *cs_client) |
131 | { | 131 | { |
132 | struct gk20a *g = ch->g; | 132 | struct gk20a *g = ch->g; |
133 | struct mm_gk20a *mm = &g->mm; | ||
133 | struct gr_gk20a *gr = &g->gr; | 134 | struct gr_gk20a *gr = &g->gr; |
134 | struct gk20a_cs_snapshot *data = gr->cs_data; | 135 | struct gk20a_cs_snapshot *data = gr->cs_data; |
135 | u32 snapshot_size = cs_client->snapshot_size; | 136 | u32 snapshot_size = cs_client->snapshot_size; |
@@ -185,8 +186,11 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch, | |||
185 | * should be written last */ | 186 | * should be written last */ |
186 | gk20a_writel(g, perf_pmasys_mem_block_r(), | 187 | gk20a_writel(g, perf_pmasys_mem_block_r(), |
187 | perf_pmasys_mem_block_base_f(inst_pa_page) | | 188 | perf_pmasys_mem_block_base_f(inst_pa_page) | |
188 | perf_pmasys_mem_block_valid_true_f() | | 189 | nvgpu_aperture_mask(g, &mm->hwpm.inst_block, |
189 | perf_pmasys_mem_block_target_lfb_f()); | 190 | perf_pmasys_mem_block_target_sys_ncoh_f(), |
191 | perf_pmasys_mem_block_target_sys_coh_f(), | ||
192 | perf_pmasys_mem_block_target_lfb_f()) | | ||
193 | perf_pmasys_mem_block_valid_true_f()); | ||
190 | 194 | ||
191 | nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n"); | 195 | nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n"); |
192 | 196 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 65bf2976..8307081e 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -385,8 +385,11 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | |||
385 | * should be written last */ | 385 | * should be written last */ |
386 | gk20a_writel(g, perf_pmasys_mem_block_r(), | 386 | gk20a_writel(g, perf_pmasys_mem_block_r(), |
387 | perf_pmasys_mem_block_base_f(inst_pa_page) | | 387 | perf_pmasys_mem_block_base_f(inst_pa_page) | |
388 | perf_pmasys_mem_block_valid_true_f() | | 388 | nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, |
389 | perf_pmasys_mem_block_target_lfb_f()); | 389 | perf_pmasys_mem_block_target_sys_ncoh_f(), |
390 | perf_pmasys_mem_block_target_sys_coh_f(), | ||
391 | perf_pmasys_mem_block_target_lfb_f()) | | ||
392 | perf_pmasys_mem_block_valid_true_f()); | ||
390 | 393 | ||
391 | gk20a_idle(g); | 394 | gk20a_idle(g); |
392 | return 0; | 395 | return 0; |