summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-09 14:57:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-27 19:03:43 -0500
commit1170687c33f7506f39aaf47acee5430233e3d1a8 (patch)
treeb41146a14a78eba99b9f326ef63efbe8ba77caab /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent71f53272b28b1086b3f34e5e255815c37504ac2c (diff)
gpu: nvgpu: Use coherent aperture flag
When using a coherent DMA API wee must make sure to program any aperture fields with the coherent aperture setting. To do this the nvgpu_aperture_mask() function was modified to take a third aperture mask argument, a coherent setting, so that code can use this function to generate coherent aperture settings. The aperture choice is some what tricky: the default version of this function uses the state of the DMA API to determine what aperture to use for SYSMEM: either coherent or non-coherent internally. Thus a kernel user need only specify the normal nvgpu_mem struct and the correct mask should be chosen. Due to many uses of nvgpu_mem structs not created directly from the DMA API wrapper it's easier to translate SYSMEM to SYSMEM_COH after creation. However, the GMMU mapping code, will encounter buffers from userspace with difference coerency attributes than the DMA API. Thus the __nvgpu_aperture_mask() really respects the aperture setting passed in regardless of the DMA API state. This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT since this is either passed in from userspace or set by the kernel when using coherent DMA. The aperture field in attrs is upgraded to coh if this flag is set. This change also adds a coherent sysmem mask everywhere that it can. There's a couple places that do not have a coherent register field defined yet. These need to eventually be defined and added. Lastly the aperture mask code has been mvoed from the Linux vm.c code to the general vm.c code since this function has no Linux dependencies. Note: depends on https://git-master.nvidia.com/r/1664536 for new register fields. JIRA EVLR-2333 Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655220 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c27
1 files changed, 16 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 6ae743ef..2cde10ec 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> 745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
746 ram_in_base_shift_v(); 746 ram_in_base_shift_v();
747 u32 aperture = nvgpu_aperture_mask(g, inst_block, 747 u32 aperture = nvgpu_aperture_mask(g, inst_block,
748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(), 748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
749 gr_fecs_current_ctx_target_vid_mem_f()); 749 gr_fecs_current_ctx_target_sys_mem_coh_f(),
750 gr_fecs_current_ctx_target_vid_mem_f());
750 751
751 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | 752 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
752 gr_fecs_current_ctx_valid_f(1); 753 gr_fecs_current_ctx_valid_f(1);
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2171 2172
2172 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); 2173 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2173 gk20a_writel(g, gr_fecs_new_ctx_r(), 2174 gk20a_writel(g, gr_fecs_new_ctx_r(),
2174 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 2175 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2175 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2176 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2176 gr_fecs_new_ctx_target_sys_mem_ncoh_f(), 2177 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2178 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2177 gr_fecs_new_ctx_target_vid_mem_f()) | 2179 gr_fecs_new_ctx_target_vid_mem_f()) |
2178 gr_fecs_new_ctx_valid_m()); 2180 gr_fecs_new_ctx_valid_m());
2179 2181
2180 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), 2182 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2181 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | 2183 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2182 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2184 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2183 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), 2185 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2186 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2184 gr_fecs_arb_ctx_ptr_target_vid_mem_f())); 2187 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2185 2188
2186 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); 2189 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4379 4382
4380 gk20a_writel(g, fb_mmu_debug_wr_r(), 4383 gk20a_writel(g, fb_mmu_debug_wr_r(),
4381 nvgpu_aperture_mask(g, &gr->mmu_wr_mem, 4384 nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
4382 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4385 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4383 fb_mmu_debug_wr_aperture_vid_mem_f()) | 4386 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4387 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4384 fb_mmu_debug_wr_vol_false_f() | 4388 fb_mmu_debug_wr_vol_false_f() |
4385 fb_mmu_debug_wr_addr_f(addr)); 4389 fb_mmu_debug_wr_addr_f(addr));
4386 4390
@@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4389 4393
4390 gk20a_writel(g, fb_mmu_debug_rd_r(), 4394 gk20a_writel(g, fb_mmu_debug_rd_r(),
4391 nvgpu_aperture_mask(g, &gr->mmu_rd_mem, 4395 nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
4392 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4396 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4393 fb_mmu_debug_rd_aperture_vid_mem_f()) | 4397 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4398 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4394 fb_mmu_debug_rd_vol_false_f() | 4399 fb_mmu_debug_rd_vol_false_f() |
4395 fb_mmu_debug_rd_addr_f(addr)); 4400 fb_mmu_debug_rd_addr_f(addr));
4396 4401