diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-06-17 08:51:02 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-07-05 02:10:59 -0400 |
commit | e12c5c8594c429357427130389da632284d79bcc (patch) | |
tree | 453043237ef411370a02ec03c6857b63480c019b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |
parent | cd5a1dc315abd0a7db4136ee0e6b0c03f0882937 (diff) |
gpu: nvgpu: initial support for vidmem apertures
add gk20a_aperture_mask() for memory target selection now that buffers
can actually be allocated from vidmem, and use it in all cases that have
a mem_desc available.
Jira DNVGPU-76
Change-Id: I4353cdc6e1e79488f0875581cfaf2a5cfb8c976a
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169306
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 74 |
1 files changed, 49 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index e0b9a720..a274820a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -92,10 +92,10 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 92 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
94 | u32 lo = (u32)(addr & 0xfffff); | 94 | u32 lo = (u32)(addr & 0xfffff); |
95 | u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? | 95 | u32 win = gk20a_aperture_mask(g, mem, |
96 | bus_bar0_window_target_sys_mem_noncoherent_f() : | 96 | bus_bar0_window_target_sys_mem_noncoherent_f(), |
97 | bus_bar0_window_target_vid_mem_f()) | | 97 | bus_bar0_window_target_vid_mem_f()) | |
98 | bus_bar0_window_base_f(hi); | 98 | bus_bar0_window_base_f(hi); |
99 | 99 | ||
100 | gk20a_dbg(gpu_dbg_mem, | 100 | gk20a_dbg(gpu_dbg_mem, |
101 | "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", | 101 | "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", |
@@ -817,8 +817,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
817 | int gk20a_init_mm_setup_hw(struct gk20a *g) | 817 | int gk20a_init_mm_setup_hw(struct gk20a *g) |
818 | { | 818 | { |
819 | struct mm_gk20a *mm = &g->mm; | 819 | struct mm_gk20a *mm = &g->mm; |
820 | struct mem_desc *inst_block = &mm->bar1.inst_block; | ||
821 | u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block); | ||
822 | int err; | 820 | int err; |
823 | 821 | ||
824 | gk20a_dbg_fn(""); | 822 | gk20a_dbg_fn(""); |
@@ -832,7 +830,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
832 | g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) | 830 | g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) |
833 | >> 8); | 831 | >> 8); |
834 | 832 | ||
835 | g->ops.mm.bar1_bind(g, inst_pa); | 833 | g->ops.mm.bar1_bind(g, &mm->bar1.inst_block); |
836 | 834 | ||
837 | if (g->ops.mm.init_bar2_mm_hw_setup) { | 835 | if (g->ops.mm.init_bar2_mm_hw_setup) { |
838 | err = g->ops.mm.init_bar2_mm_hw_setup(g); | 836 | err = g->ops.mm.init_bar2_mm_hw_setup(g); |
@@ -847,17 +845,19 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
847 | return 0; | 845 | return 0; |
848 | } | 846 | } |
849 | 847 | ||
850 | static int gk20a_mm_bar1_bind(struct gk20a *g, u64 bar1_iova) | 848 | static int gk20a_mm_bar1_bind(struct gk20a *g, struct mem_desc *bar1_inst) |
851 | { | 849 | { |
852 | u64 inst_pa = (u32)(bar1_iova >> bar1_instance_block_shift_gk20a()); | 850 | u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); |
853 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); | 851 | u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); |
852 | |||
853 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); | ||
854 | 854 | ||
855 | gk20a_writel(g, bus_bar1_block_r(), | 855 | gk20a_writel(g, bus_bar1_block_r(), |
856 | (g->mm.vidmem_is_vidmem ? | 856 | gk20a_aperture_mask(g, bar1_inst, |
857 | bus_bar1_block_target_sys_mem_ncoh_f() : | 857 | bus_bar1_block_target_sys_mem_ncoh_f(), |
858 | bus_bar1_block_target_vid_mem_f()) | | 858 | bus_bar1_block_target_vid_mem_f()) | |
859 | bus_bar1_block_mode_virtual_f() | | 859 | bus_bar1_block_mode_virtual_f() | |
860 | bus_bar1_block_ptr_f(inst_pa)); | 860 | bus_bar1_block_ptr_f(ptr_v)); |
861 | 861 | ||
862 | return 0; | 862 | return 0; |
863 | } | 863 | } |
@@ -2559,6 +2559,29 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | |||
2559 | return gk20a_gmmu_free_attr(g, 0, mem); | 2559 | return gk20a_gmmu_free_attr(g, 0, mem); |
2560 | } | 2560 | } |
2561 | 2561 | ||
2562 | u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | ||
2563 | u32 sysmem_mask, u32 vidmem_mask) | ||
2564 | { | ||
2565 | switch (aperture) { | ||
2566 | case APERTURE_SYSMEM: | ||
2567 | /* sysmem for dgpus; some igpus consider system memory vidmem */ | ||
2568 | return g->mm.vidmem_is_vidmem ? sysmem_mask : vidmem_mask; | ||
2569 | case APERTURE_VIDMEM: | ||
2570 | /* for dgpus only */ | ||
2571 | return vidmem_mask; | ||
2572 | case APERTURE_INVALID: | ||
2573 | WARN_ON("Bad aperture"); | ||
2574 | } | ||
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, | ||
2579 | u32 sysmem_mask, u32 vidmem_mask) | ||
2580 | { | ||
2581 | return __gk20a_aperture_mask(g, mem->aperture, | ||
2582 | sysmem_mask, vidmem_mask); | ||
2583 | } | ||
2584 | |||
2562 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) | 2585 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) |
2563 | { | 2586 | { |
2564 | return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); | 2587 | return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); |
@@ -4049,19 +4072,23 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) | |||
4049 | false, false, "cde"); | 4072 | false, false, "cde"); |
4050 | } | 4073 | } |
4051 | 4074 | ||
4052 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) | 4075 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block, |
4076 | struct vm_gk20a *vm) | ||
4053 | { | 4077 | { |
4078 | u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | ||
4054 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 4079 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
4055 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 4080 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
4056 | 4081 | ||
4057 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), | 4082 | gk20a_dbg_info("pde pa=0x%llx", pdb_addr); |
4058 | (g->mm.vidmem_is_vidmem ? | 4083 | |
4059 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 4084 | gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), |
4085 | gk20a_aperture_mask(g, &vm->pdb.mem, | ||
4086 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), | ||
4060 | ram_in_page_dir_base_target_vid_mem_f()) | | 4087 | ram_in_page_dir_base_target_vid_mem_f()) | |
4061 | ram_in_page_dir_base_vol_true_f() | | 4088 | ram_in_page_dir_base_vol_true_f() | |
4062 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | 4089 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); |
4063 | 4090 | ||
4064 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), | 4091 | gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), |
4065 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 4092 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
4066 | } | 4093 | } |
4067 | 4094 | ||
@@ -4069,14 +4096,11 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, | |||
4069 | u32 big_page_size) | 4096 | u32 big_page_size) |
4070 | { | 4097 | { |
4071 | struct gk20a *g = gk20a_from_vm(vm); | 4098 | struct gk20a *g = gk20a_from_vm(vm); |
4072 | u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | ||
4073 | 4099 | ||
4074 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | 4100 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", |
4075 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); | 4101 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); |
4076 | 4102 | ||
4077 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); | 4103 | g->ops.mm.init_pdb(g, inst_block, vm); |
4078 | |||
4079 | g->ops.mm.init_pdb(g, inst_block, pde_addr); | ||
4080 | 4104 | ||
4081 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), | 4105 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), |
4082 | u64_lo32(vm->va_limit - 1) & ~0xfff); | 4106 | u64_lo32(vm->va_limit - 1) & ~0xfff); |
@@ -4311,7 +4335,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | |||
4311 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | 4335 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) |
4312 | { | 4336 | { |
4313 | struct gk20a *g = gk20a_from_vm(vm); | 4337 | struct gk20a *g = gk20a_from_vm(vm); |
4314 | u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(vm->mm->g, | 4338 | u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(g, |
4315 | vm->pdb.mem.sgt->sgl, 0) >> 12); | 4339 | vm->pdb.mem.sgt->sgl, 0) >> 12); |
4316 | u32 data; | 4340 | u32 data; |
4317 | s32 retry = 2000; | 4341 | s32 retry = 2000; |
@@ -4348,8 +4372,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
4348 | 4372 | ||
4349 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | 4373 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), |
4350 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | 4374 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | |
4351 | (g->mm.vidmem_is_vidmem ? | 4375 | gk20a_aperture_mask(g, &vm->pdb.mem, |
4352 | fb_mmu_invalidate_pdb_aperture_sys_mem_f() : | 4376 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), |
4353 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); | 4377 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); |
4354 | 4378 | ||
4355 | gk20a_writel(g, fb_mmu_invalidate_r(), | 4379 | gk20a_writel(g, fb_mmu_invalidate_r(), |