summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-06-17 08:51:02 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-07-05 02:10:59 -0400
commite12c5c8594c429357427130389da632284d79bcc (patch)
tree453043237ef411370a02ec03c6857b63480c019b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parentcd5a1dc315abd0a7db4136ee0e6b0c03f0882937 (diff)
gpu: nvgpu: initial support for vidmem apertures
add gk20a_aperture_mask() for memory target selection now that buffers can actually be allocated from vidmem, and use it in all cases that have a mem_desc available. Jira DNVGPU-76 Change-Id: I4353cdc6e1e79488f0875581cfaf2a5cfb8c976a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1169306 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c74
1 files changed, 49 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index e0b9a720..a274820a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -92,10 +92,10 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
92 u32 hi = (u32)((addr & ~(u64)0xfffff) 92 u32 hi = (u32)((addr & ~(u64)0xfffff)
93 >> bus_bar0_window_target_bar0_window_base_shift_v()); 93 >> bus_bar0_window_target_bar0_window_base_shift_v());
94 u32 lo = (u32)(addr & 0xfffff); 94 u32 lo = (u32)(addr & 0xfffff);
95 u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? 95 u32 win = gk20a_aperture_mask(g, mem,
96 bus_bar0_window_target_sys_mem_noncoherent_f() : 96 bus_bar0_window_target_sys_mem_noncoherent_f(),
97 bus_bar0_window_target_vid_mem_f()) | 97 bus_bar0_window_target_vid_mem_f()) |
98 bus_bar0_window_base_f(hi); 98 bus_bar0_window_base_f(hi);
99 99
100 gk20a_dbg(gpu_dbg_mem, 100 gk20a_dbg(gpu_dbg_mem,
101 "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", 101 "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
@@ -817,8 +817,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
817int gk20a_init_mm_setup_hw(struct gk20a *g) 817int gk20a_init_mm_setup_hw(struct gk20a *g)
818{ 818{
819 struct mm_gk20a *mm = &g->mm; 819 struct mm_gk20a *mm = &g->mm;
820 struct mem_desc *inst_block = &mm->bar1.inst_block;
821 u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
822 int err; 820 int err;
823 821
824 gk20a_dbg_fn(""); 822 gk20a_dbg_fn("");
@@ -832,7 +830,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
832 g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) 830 g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
833 >> 8); 831 >> 8);
834 832
835 g->ops.mm.bar1_bind(g, inst_pa); 833 g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
836 834
837 if (g->ops.mm.init_bar2_mm_hw_setup) { 835 if (g->ops.mm.init_bar2_mm_hw_setup) {
838 err = g->ops.mm.init_bar2_mm_hw_setup(g); 836 err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -847,17 +845,19 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
847 return 0; 845 return 0;
848} 846}
849 847
850static int gk20a_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) 848static int gk20a_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst)
851{ 849{
852 u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); 850 u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
853 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); 851 u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
852
853 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
854 854
855 gk20a_writel(g, bus_bar1_block_r(), 855 gk20a_writel(g, bus_bar1_block_r(),
856 (g->mm.vidmem_is_vidmem ? 856 gk20a_aperture_mask(g, bar1_inst,
857 bus_bar1_block_target_sys_mem_ncoh_f() : 857 bus_bar1_block_target_sys_mem_ncoh_f(),
858 bus_bar1_block_target_vid_mem_f()) | 858 bus_bar1_block_target_vid_mem_f()) |
859 bus_bar1_block_mode_virtual_f() | 859 bus_bar1_block_mode_virtual_f() |
860 bus_bar1_block_ptr_f(inst_pa)); 860 bus_bar1_block_ptr_f(ptr_v));
861 861
862 return 0; 862 return 0;
863} 863}
@@ -2559,6 +2559,29 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
2559 return gk20a_gmmu_free_attr(g, 0, mem); 2559 return gk20a_gmmu_free_attr(g, 0, mem);
2560} 2560}
2561 2561
2562u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
2563 u32 sysmem_mask, u32 vidmem_mask)
2564{
2565 switch (aperture) {
2566 case APERTURE_SYSMEM:
2567 /* sysmem for dgpus; some igpus consider system memory vidmem */
2568 return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask;
2569 case APERTURE_VIDMEM:
2570 /* for dgpus only */
2571 return vidmem_mask;
2572 case APERTURE_INVALID:
2573 WARN_ON("Bad aperture");
2574 }
2575 return 0;
2576}
2577
2578u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
2579 u32 sysmem_mask, u32 vidmem_mask)
2580{
2581 return __gk20a_aperture_mask(g, mem->aperture,
2582 sysmem_mask, vidmem_mask);
2583}
2584
2562int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) 2585int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem)
2563{ 2586{
2564 return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); 2587 return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem);
@@ -4049,19 +4072,23 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
4049 false, false, "cde"); 4072 false, false, "cde");
4050} 4073}
4051 4074
4052void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) 4075void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
4076 struct vm_gk20a *vm)
4053{ 4077{
4078 u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
4054 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 4079 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
4055 u32 pdb_addr_hi = u64_hi32(pdb_addr); 4080 u32 pdb_addr_hi = u64_hi32(pdb_addr);
4056 4081
4057 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), 4082 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
4058 (g->mm.vidmem_is_vidmem ? 4083
4059 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 4084 gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
4085 gk20a_aperture_mask(g, &vm->pdb.mem,
4086 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
4060 ram_in_page_dir_base_target_vid_mem_f()) | 4087 ram_in_page_dir_base_target_vid_mem_f()) |
4061 ram_in_page_dir_base_vol_true_f() | 4088 ram_in_page_dir_base_vol_true_f() |
4062 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 4089 ram_in_page_dir_base_lo_f(pdb_addr_lo));
4063 4090
4064 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), 4091 gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
4065 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 4092 ram_in_page_dir_base_hi_f(pdb_addr_hi));
4066} 4093}
4067 4094
@@ -4069,14 +4096,11 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
4069 u32 big_page_size) 4096 u32 big_page_size)
4070{ 4097{
4071 struct gk20a *g = gk20a_from_vm(vm); 4098 struct gk20a *g = gk20a_from_vm(vm);
4072 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
4073 4099
4074 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 4100 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
4075 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); 4101 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
4076 4102
4077 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); 4103 g->ops.mm.init_pdb(g, inst_block, vm);
4078
4079 g->ops.mm.init_pdb(g, inst_block, pde_addr);
4080 4104
4081 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), 4105 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
4082 u64_lo32(vm->va_limit - 1) & ~0xfff); 4106 u64_lo32(vm->va_limit - 1) & ~0xfff);
@@ -4311,7 +4335,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
4311void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) 4335void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
4312{ 4336{
4313 struct gk20a *g = gk20a_from_vm(vm); 4337 struct gk20a *g = gk20a_from_vm(vm);
4314 u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(vm->mm->g, 4338 u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(g,
4315 vm->pdb.mem.sgt->sgl, 0) >> 12); 4339 vm->pdb.mem.sgt->sgl, 0) >> 12);
4316 u32 data; 4340 u32 data;
4317 s32 retry = 2000; 4341 s32 retry = 2000;
@@ -4348,8 +4372,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
4348 4372
4349 gk20a_writel(g, fb_mmu_invalidate_pdb_r(), 4373 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
4350 fb_mmu_invalidate_pdb_addr_f(addr_lo) | 4374 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
4351 (g->mm.vidmem_is_vidmem ? 4375 gk20a_aperture_mask(g, &vm->pdb.mem,
4352 fb_mmu_invalidate_pdb_aperture_sys_mem_f() : 4376 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
4353 fb_mmu_invalidate_pdb_aperture_vid_mem_f())); 4377 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
4354 4378
4355 gk20a_writel(g, fb_mmu_invalidate_r(), 4379 gk20a_writel(g, fb_mmu_invalidate_r(),