From 8432f6d80a0590e8ac10849443ac27351471505e Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Mon, 6 Jun 2016 16:22:48 +0300 Subject: gpu: nvgpu: cache whole bar0_window for mem accesses Save the whole bar0 window register that encodes also the target aperture (vid/sys mem) instead of only the base address that could overlap between the two. JIRA DNVGPU-23 Change-Id: I2ccbea0e1f7c7310c1ca6b158afafe8fd974a615 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1159523 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 36 ++++++++++++++++++++---------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 ++-- 2 files changed, 22 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index ec946fb6..6505015f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -84,28 +84,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) mem->cpu_va = NULL; } -/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */ +/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) { u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); u64 addr = bufbase + w * sizeof(u32); u32 hi = (u32)((addr & ~(u64)0xfffff) >> bus_bar0_window_target_bar0_window_base_shift_v()); - u32 lo = (addr & 0xfffff); + u32 lo = (u32)(addr & 0xfffff); + u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? + bus_bar0_window_target_sys_mem_noncoherent_f() : + bus_bar0_window_target_vid_mem_f()) | + bus_bar0_window_base_f(hi); - gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem); + gk20a_dbg(gpu_dbg_mem, + "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", + hi, lo, mem, bufbase, bufbase + mem->size, mem->size); WARN_ON(!bufbase); - spin_lock(&g->mm.pramin_base_lock); - if (g->mm.pramin_base != hi) { - gk20a_writel(g, bus_bar0_window_r(), - (g->mm.vidmem_is_vidmem - && mem->aperture == APERTURE_SYSMEM ? - bus_bar0_window_target_sys_mem_noncoherent_f() : - bus_bar0_window_target_vid_mem_f()) | - bus_bar0_window_base_f(hi)); + + spin_lock(&g->mm.pramin_window_lock); + + if (g->mm.pramin_window != win) { + gk20a_writel(g, bus_bar0_window_r(), win); gk20a_readl(g, bus_bar0_window_r()); - g->mm.pramin_base = hi; + g->mm.pramin_window = win; } return lo; @@ -114,7 +117,8 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) { gk20a_dbg(gpu_dbg_mem, "end for %p", mem); - spin_unlock(&g->mm.pramin_base_lock); + + spin_unlock(&g->mm.pramin_window_lock); } u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) @@ -174,7 +178,7 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { u32 addr = gk20a_pramin_enter(g, mem, w); gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); - /* read back to synchronize accesses*/ + /* read back to synchronize accesses */ gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); gk20a_pramin_exit(g, mem); } else { @@ -604,8 +608,8 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) static void gk20a_init_pramin(struct mm_gk20a *mm) { - mm->pramin_base = 0; - spin_lock_init(&mm->pramin_base_lock); + mm->pramin_window = 0; + spin_lock_init(&mm->pramin_window_lock); mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c58a4fec..e83e1111 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -365,8 +365,8 @@ struct mm_gk20a { struct mem_desc sysmem_flush; - u32 pramin_base; - spinlock_t pramin_base_lock; + u32 pramin_window; + spinlock_t pramin_window_lock; #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) u32 force_pramin; /* via debugfs */ #else -- cgit v1.2.2