gpu: nvgpu: cache whole bar0_window for mem accesses

Save the whole bar0 window register that encodes also the target aperture (vid/sys mem) instead of only the base address that could overlap between the two. JIRA DNVGPU-23 Change-Id: I2ccbea0e1f7c7310c1ca6b158afafe8fd974a615 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1159523 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2016-06-06 09:22:48 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2016-06-07 12:24:14 -0400
commit: 8432f6d80a0590e8ac10849443ac27351471505e (patch)
tree: 34e61ffc7ffe6db33a102d0d0188deb0b0e582a3
parent: 697c8c1a98f09906c6513a0ed62052a375e83dc9 (diff)
2 files changed, 22 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ec946fb6..6505015f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -84,28 +84,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
        mem->cpu_va = NULL;
 }
-/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */
+/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
 static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
 {
        u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
        u64 addr = bufbase + w * sizeof(u32);
        u32 hi = (u32)((addr & ~(u64)0xfffff)
                >> bus_bar0_window_target_bar0_window_base_shift_v());
-        u32 lo = (addr & 0xfffff);
+        u32 lo = (u32)(addr & 0xfffff);
+        u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ?
+                  bus_bar0_window_target_sys_mem_noncoherent_f() :
+                 bus_bar0_window_target_vid_mem_f()) |
+                 bus_bar0_window_base_f(hi);
-        gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem);
+        gk20a_dbg(gpu_dbg_mem,
+                        "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
+                        hi, lo, mem, bufbase, bufbase + mem->size, mem->size);
        WARN_ON(!bufbase);
-        spin_lock(&g->mm.pramin_base_lock);
-        if (g->mm.pramin_base != hi) {
+        spin_lock(&g->mm.pramin_window_lock);
-                gk20a_writel(g, bus_bar0_window_r(),
-                                (g->mm.vidmem_is_vidmem
+        if (g->mm.pramin_window != win) {
-                                 && mem->aperture == APERTURE_SYSMEM ?
+                gk20a_writel(g, bus_bar0_window_r(), win);
-                                 bus_bar0_window_target_sys_mem_noncoherent_f() :
-                                 bus_bar0_window_target_vid_mem_f()) |
-                                bus_bar0_window_base_f(hi));
                gk20a_readl(g, bus_bar0_window_r());
-                g->mm.pramin_base = hi;
+                g->mm.pramin_window = win;
        }
        return lo;
@@ -114,7 +117,8 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
 static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem)
 {
        gk20a_dbg(gpu_dbg_mem, "end for %p", mem);
-        spin_unlock(&g->mm.pramin_base_lock);
+        spin_unlock(&g->mm.pramin_window_lock);
 }
 u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
@@ -174,7 +178,7 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
        } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
                u32 addr = gk20a_pramin_enter(g, mem, w);
                gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);
-                /* read back to synchronize accesses*/
+                /* read back to synchronize accesses */
                gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
                gk20a_pramin_exit(g, mem);
        } else {
@@ -604,8 +608,8 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
 static void gk20a_init_pramin(struct mm_gk20a *mm)
 {
-        mm->pramin_base = 0;
+        mm->pramin_window = 0;
-        spin_lock_init(&mm->pramin_base_lock);
+        spin_lock_init(&mm->pramin_window_lock);
        mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index c58a4fec..e83e1111 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -365,8 +365,8 @@ struct mm_gk20a {
        struct mem_desc sysmem_flush;
-        u32 pramin_base;
+        u32 pramin_window;
-        spinlock_t pramin_base_lock;
+        spinlock_t pramin_window_lock;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
        u32 force_pramin; /* via debugfs */
 #else
author	Konsta Holtta <kholtta@nvidia.com>	2016-06-06 09:22:48 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2016-06-07 12:24:14 -0400
commit	8432f6d80a0590e8ac10849443ac27351471505e (patch)
tree	34e61ffc7ffe6db33a102d0d0188deb0b0e582a3
parent	697c8c1a98f09906c6513a0ed62052a375e83dc9 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index ec946fb6..6505015f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -84,28 +84,31 @@ void gk20a_mem_end(struct gk20a g, struct mem_desc mem)
84	mem->cpu_va = NULL;	84	mem->cpu_va = NULL;
85	}	85	}
86		86
87	/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */	87	/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
88	static u32 gk20a_pramin_enter(struct gk20a g, struct mem_desc mem, u32 w)	88	static u32 gk20a_pramin_enter(struct gk20a g, struct mem_desc mem, u32 w)
89	{	89	{
90	u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);	90	u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
91	u64 addr = bufbase + w * sizeof(u32);	91	u64 addr = bufbase + w * sizeof(u32);
92	u32 hi = (u32)((addr & ~(u64)0xfffff)	92	u32 hi = (u32)((addr & ~(u64)0xfffff)
93	>> bus_bar0_window_target_bar0_window_base_shift_v());	93	>> bus_bar0_window_target_bar0_window_base_shift_v());
94	u32 lo = (addr & 0xfffff);	94	u32 lo = (u32)(addr & 0xfffff);
		95	u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ?
		96	bus_bar0_window_target_sys_mem_noncoherent_f() :
		97	bus_bar0_window_target_vid_mem_f()) \|
		98	bus_bar0_window_base_f(hi);
95		99
96	gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem);	100	gk20a_dbg(gpu_dbg_mem,
		101	"0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
		102	hi, lo, mem, bufbase, bufbase + mem->size, mem->size);
97		103
98	WARN_ON(!bufbase);	104	WARN_ON(!bufbase);
99	spin_lock(&g->mm.pramin_base_lock);	105
100	if (g->mm.pramin_base != hi) {	106	spin_lock(&g->mm.pramin_window_lock);
101	gk20a_writel(g, bus_bar0_window_r(),	107
102	(g->mm.vidmem_is_vidmem	108	if (g->mm.pramin_window != win) {
103	&& mem->aperture == APERTURE_SYSMEM ?	109	gk20a_writel(g, bus_bar0_window_r(), win);
104	bus_bar0_window_target_sys_mem_noncoherent_f() :
105	bus_bar0_window_target_vid_mem_f()) \|
106	bus_bar0_window_base_f(hi));
107	gk20a_readl(g, bus_bar0_window_r());	110	gk20a_readl(g, bus_bar0_window_r());
108	g->mm.pramin_base = hi;	111	g->mm.pramin_window = win;
109	}	112	}
110		113
111	return lo;	114	return lo;
@@ -114,7 +117,8 @@ static u32 gk20a_pramin_enter(struct gk20a g, struct mem_desc mem, u32 w)
114	static void gk20a_pramin_exit(struct gk20a g, struct mem_desc mem)	117	static void gk20a_pramin_exit(struct gk20a g, struct mem_desc mem)
115	{	118	{
116	gk20a_dbg(gpu_dbg_mem, "end for %p", mem);	119	gk20a_dbg(gpu_dbg_mem, "end for %p", mem);
117	spin_unlock(&g->mm.pramin_base_lock);	120
		121	spin_unlock(&g->mm.pramin_window_lock);
118	}	122	}
119		123
120	u32 gk20a_mem_rd32(struct gk20a g, struct mem_desc mem, u32 w)	124	u32 gk20a_mem_rd32(struct gk20a g, struct mem_desc mem, u32 w)
@@ -174,7 +178,7 @@ void gk20a_mem_wr32(struct gk20a g, struct mem_desc mem, u32 w, u32 data)
174	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {	178	} else if (mem->aperture == APERTURE_VIDMEM \|\| g->mm.force_pramin) {
175	u32 addr = gk20a_pramin_enter(g, mem, w);	179	u32 addr = gk20a_pramin_enter(g, mem, w);
176	gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);	180	gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);
177	/* read back to synchronize accesses*/	181	/* read back to synchronize accesses */
178	gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));	182	gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
179	gk20a_pramin_exit(g, mem);	183	gk20a_pramin_exit(g, mem);
180	} else {	184	} else {
@@ -604,8 +608,8 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
604		608
605	static void gk20a_init_pramin(struct mm_gk20a *mm)	609	static void gk20a_init_pramin(struct mm_gk20a *mm)
606	{	610	{
607	mm->pramin_base = 0;	611	mm->pramin_window = 0;
608	spin_lock_init(&mm->pramin_base_lock);	612	spin_lock_init(&mm->pramin_window_lock);
609	mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;	613	mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
610	}	614	}
611		615


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c58a4fec..e83e1111 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -365,8 +365,8 @@ struct mm_gk20a {
365		365
366	struct mem_desc sysmem_flush;	366	struct mem_desc sysmem_flush;
367		367
368	u32 pramin_base;	368	u32 pramin_window;
369	spinlock_t pramin_base_lock;	369	spinlock_t pramin_window_lock;
370	#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)	370	#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
371	u32 force_pramin; /* via debugfs */	371	u32 force_pramin; /* via debugfs */
372	#else	372	#else