summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-08-09 20:32:33 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-08-15 20:49:39 -0400
commit1e7f229e5d92078f772d4f81893b23504cd847a8 (patch)
tree2c2921d02f3a9c3ae1ff9bd5df4682493297bd09
parent32127c65151ffc7288ff8483d5b18be77a662b0b (diff)
gpu: nvgpu: Use correct aperture for perf inst_block
The perf inst block was being treated as vidmem (LFB - local framebuffer) always, regardless of the type of nvgpu_mem used for the instance block. On dGPUs this was fine becasue we always allocate instance blocks from vidmem. Inst blocks are allocated with nvgpu_dma_alloc() which chooses vidmem if vidmem is present, otherwise falls back to sysmem. When the above fall back logic was deleted this caused inst blocks to always be allocated in sysmem, even for dGPUs. This isn't a problem in an of itself but the logic for the perf instance block bind operation assumed a VIDMEM inst_block. Thus this patch uses the nvgpu_aperture_mask() function to correctly program the required aperture target for the perf's inst block bind operation. JIRA NVGPU-990 Change-Id: If6f09a743ee2ad47a6dbfa28cb7c61f1461fd8a7 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1796388 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c7
2 files changed, 11 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 14ae17ea..4aeeec1c 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -130,6 +130,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch,
130 struct gk20a_cs_snapshot_client *cs_client) 130 struct gk20a_cs_snapshot_client *cs_client)
131{ 131{
132 struct gk20a *g = ch->g; 132 struct gk20a *g = ch->g;
133 struct mm_gk20a *mm = &g->mm;
133 struct gr_gk20a *gr = &g->gr; 134 struct gr_gk20a *gr = &g->gr;
134 struct gk20a_cs_snapshot *data = gr->cs_data; 135 struct gk20a_cs_snapshot *data = gr->cs_data;
135 u32 snapshot_size = cs_client->snapshot_size; 136 u32 snapshot_size = cs_client->snapshot_size;
@@ -185,8 +186,11 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch,
185 * should be written last */ 186 * should be written last */
186 gk20a_writel(g, perf_pmasys_mem_block_r(), 187 gk20a_writel(g, perf_pmasys_mem_block_r(),
187 perf_pmasys_mem_block_base_f(inst_pa_page) | 188 perf_pmasys_mem_block_base_f(inst_pa_page) |
188 perf_pmasys_mem_block_valid_true_f() | 189 nvgpu_aperture_mask(g, &mm->hwpm.inst_block,
189 perf_pmasys_mem_block_target_lfb_f()); 190 perf_pmasys_mem_block_target_sys_ncoh_f(),
191 perf_pmasys_mem_block_target_sys_coh_f(),
192 perf_pmasys_mem_block_target_lfb_f()) |
193 perf_pmasys_mem_block_valid_true_f());
190 194
191 nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n"); 195 nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n");
192 196
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 65bf2976..8307081e 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -385,8 +385,11 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
385 * should be written last */ 385 * should be written last */
386 gk20a_writel(g, perf_pmasys_mem_block_r(), 386 gk20a_writel(g, perf_pmasys_mem_block_r(),
387 perf_pmasys_mem_block_base_f(inst_pa_page) | 387 perf_pmasys_mem_block_base_f(inst_pa_page) |
388 perf_pmasys_mem_block_valid_true_f() | 388 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
389 perf_pmasys_mem_block_target_lfb_f()); 389 perf_pmasys_mem_block_target_sys_ncoh_f(),
390 perf_pmasys_mem_block_target_sys_coh_f(),
391 perf_pmasys_mem_block_target_lfb_f()) |
392 perf_pmasys_mem_block_valid_true_f());
390 393
391 gk20a_idle(g); 394 gk20a_idle(g);
392 return 0; 395 return 0;