From 8432f6d80a0590e8ac10849443ac27351471505e Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 6 Jun 2016 16:22:48 +0300
Subject: gpu: nvgpu: cache whole bar0_window for mem accesses

Save the whole bar0 window register that encodes also the target
aperture (vid/sys mem) instead of only the base address that could
overlap between the two.

JIRA DNVGPU-23

Change-Id: I2ccbea0e1f7c7310c1ca6b158afafe8fd974a615
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1159523
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 36 ++++++++++++++++++++----------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h |  4 ++--
 2 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ec946fb6..6505015f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -84,28 +84,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
 	mem->cpu_va = NULL;
 }
 
-/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */
+/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
 static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
 {
 	u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
 	u64 addr = bufbase + w * sizeof(u32);
 	u32 hi = (u32)((addr & ~(u64)0xfffff)
 		>> bus_bar0_window_target_bar0_window_base_shift_v());
-	u32 lo = (addr & 0xfffff);
+	u32 lo = (u32)(addr & 0xfffff);
+	u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ?
+		  bus_bar0_window_target_sys_mem_noncoherent_f() :
+		 bus_bar0_window_target_vid_mem_f()) |
+		 bus_bar0_window_base_f(hi);
 
-	gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem);
+	gk20a_dbg(gpu_dbg_mem,
+			"0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
+			hi, lo, mem, bufbase, bufbase + mem->size, mem->size);
 
 	WARN_ON(!bufbase);
-	spin_lock(&g->mm.pramin_base_lock);
-	if (g->mm.pramin_base != hi) {
-		gk20a_writel(g, bus_bar0_window_r(),
-				(g->mm.vidmem_is_vidmem
-				 && mem->aperture == APERTURE_SYSMEM ?
-				 bus_bar0_window_target_sys_mem_noncoherent_f() :
-				 bus_bar0_window_target_vid_mem_f()) |
-				bus_bar0_window_base_f(hi));
+
+	spin_lock(&g->mm.pramin_window_lock);
+
+	if (g->mm.pramin_window != win) {
+		gk20a_writel(g, bus_bar0_window_r(), win);
 		gk20a_readl(g, bus_bar0_window_r());
-		g->mm.pramin_base = hi;
+		g->mm.pramin_window = win;
 	}
 
 	return lo;
@@ -114,7 +117,8 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
 static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem)
 {
 	gk20a_dbg(gpu_dbg_mem, "end for %p", mem);
-	spin_unlock(&g->mm.pramin_base_lock);
+
+	spin_unlock(&g->mm.pramin_window_lock);
 }
 
 u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
@@ -174,7 +178,7 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
 	} else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
 		u32 addr = gk20a_pramin_enter(g, mem, w);
 		gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);
-		/* read back to synchronize accesses*/
+		/* read back to synchronize accesses */
 		gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
 		gk20a_pramin_exit(g, mem);
 	} else {
@@ -604,8 +608,8 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
 
 static void gk20a_init_pramin(struct mm_gk20a *mm)
 {
-	mm->pramin_base = 0;
-	spin_lock_init(&mm->pramin_base_lock);
+	mm->pramin_window = 0;
+	spin_lock_init(&mm->pramin_window_lock);
 	mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index c58a4fec..e83e1111 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -365,8 +365,8 @@ struct mm_gk20a {
 
 	struct mem_desc sysmem_flush;
 
-	u32 pramin_base;
-	spinlock_t pramin_base_lock;
+	u32 pramin_window;
+	spinlock_t pramin_window_lock;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
 	u32 force_pramin; /* via debugfs */
 #else
-- 
cgit v1.2.2