summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-08-23 04:58:28 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-01 12:11:53 -0400
commit44c5b5877b9b1520f644d8516f7ee19125a832d5 (patch)
tree46c73428a406ad41fa0c0c76f166956e50820a18 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent93a436f581ced27efe2e600bb16b2e50b5696c12 (diff)
gpu: nvgpu: add new API to get base address for sysmem/vidmem buffers
Add new API gk20a_mem_get_base_addr() which will return vidmem base address in case of vidmem and IOVA address in case of sysmem Even though vidmem allocations are non-contiguous, this API is useful (and should only be used) for allocations with one chunk (e.g. page tables) Also, since page tables could either reside in sysmem or vidmem, use this API to get address of page tables Jira DNVGPU-20 Change-Id: Ie04af9ca7bfccfec1a8a8e4be2c507cef5cef8e1 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1206403 (cherry picked from commit a8c74dc188878f2948fa1e0e47bf1837fba6c5e0) Reviewed-on: http://git-master/r/1210957 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c36
1 files changed, 31 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index bcde1746..1124a17d 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -3058,6 +3058,31 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
3058 return gk20a_gmmu_free_attr(g, 0, mem); 3058 return gk20a_gmmu_free_attr(g, 0, mem);
3059} 3059}
3060 3060
3061/*
3062 * If mem is in VIDMEM, return base address in vidmem
3063 * else return IOVA address for SYSMEM
3064 */
3065u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem,
3066 u32 flags)
3067{
3068 struct gk20a_page_alloc *alloc;
3069 u64 addr;
3070
3071 if (mem->aperture == APERTURE_VIDMEM) {
3072 alloc = (struct gk20a_page_alloc *)
3073 sg_dma_address(mem->sgt->sgl);
3074
3075 /* This API should not be used with > 1 chunks */
3076 WARN_ON(alloc->nr_chunks != 1);
3077
3078 addr = alloc->base;
3079 } else {
3080 addr = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, flags);
3081 }
3082
3083 return addr;
3084}
3085
3061#if defined(CONFIG_GK20A_VIDMEM) 3086#if defined(CONFIG_GK20A_VIDMEM)
3062static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm) 3087static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm)
3063{ 3088{
@@ -3341,7 +3366,7 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
3341static inline u32 big_valid_pde0_bits(struct gk20a *g, 3366static inline u32 big_valid_pde0_bits(struct gk20a *g,
3342 struct mem_desc *entry_mem) 3367 struct mem_desc *entry_mem)
3343{ 3368{
3344 u64 pte_addr = g->ops.mm.get_iova_addr(g, entry_mem->sgt->sgl, 0); 3369 u64 pte_addr = gk20a_mem_get_base_addr(g, entry_mem, 0);
3345 u32 pde0_bits = 3370 u32 pde0_bits =
3346 gk20a_aperture_mask(g, entry_mem, 3371 gk20a_aperture_mask(g, entry_mem,
3347 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 3372 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
@@ -3355,7 +3380,7 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
3355static inline u32 small_valid_pde1_bits(struct gk20a *g, 3380static inline u32 small_valid_pde1_bits(struct gk20a *g,
3356 struct mem_desc *entry_mem) 3381 struct mem_desc *entry_mem)
3357{ 3382{
3358 u64 pte_addr = g->ops.mm.get_iova_addr(g, entry_mem->sgt->sgl, 0); 3383 u64 pte_addr = gk20a_mem_get_base_addr(g, entry_mem, 0);
3359 u32 pde1_bits = 3384 u32 pde1_bits =
3360 gk20a_aperture_mask(g, entry_mem, 3385 gk20a_aperture_mask(g, entry_mem,
3361 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 3386 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
@@ -4709,7 +4734,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
4709void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block, 4734void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
4710 struct vm_gk20a *vm) 4735 struct vm_gk20a *vm)
4711{ 4736{
4712 u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); 4737 u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0);
4713 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 4738 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
4714 u32 pdb_addr_hi = u64_hi32(pdb_addr); 4739 u32 pdb_addr_hi = u64_hi32(pdb_addr);
4715 4740
@@ -4969,8 +4994,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
4969void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) 4994void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
4970{ 4995{
4971 struct gk20a *g = gk20a_from_vm(vm); 4996 struct gk20a *g = gk20a_from_vm(vm);
4972 u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(g, 4997 u32 addr_lo;
4973 vm->pdb.mem.sgt->sgl, 0) >> 12);
4974 u32 data; 4998 u32 data;
4975 s32 retry = 2000; 4999 s32 retry = 2000;
4976 static DEFINE_MUTEX(tlb_lock); 5000 static DEFINE_MUTEX(tlb_lock);
@@ -4986,6 +5010,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
4986 if (!g->power_on) 5010 if (!g->power_on)
4987 return; 5011 return;
4988 5012
5013 addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0) >> 12);
5014
4989 mutex_lock(&tlb_lock); 5015 mutex_lock(&tlb_lock);
4990 5016
4991 trace_gk20a_mm_tlb_invalidate(dev_name(g->dev)); 5017 trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));