summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-06-06 09:22:48 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-07 12:24:14 -0400
commit8432f6d80a0590e8ac10849443ac27351471505e (patch)
tree34e61ffc7ffe6db33a102d0d0188deb0b0e582a3
parent697c8c1a98f09906c6513a0ed62052a375e83dc9 (diff)
gpu: nvgpu: cache whole bar0_window for mem accesses
Save the whole bar0 window register that encodes also the target aperture (vid/sys mem) instead of only the base address that could overlap between the two. JIRA DNVGPU-23 Change-Id: I2ccbea0e1f7c7310c1ca6b158afafe8fd974a615 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1159523 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c36
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h4
2 files changed, 22 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ec946fb6..6505015f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -84,28 +84,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
84 mem->cpu_va = NULL; 84 mem->cpu_va = NULL;
85} 85}
86 86
87/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */ 87/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
88static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) 88static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
89{ 89{
90 u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); 90 u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
91 u64 addr = bufbase + w * sizeof(u32); 91 u64 addr = bufbase + w * sizeof(u32);
92 u32 hi = (u32)((addr & ~(u64)0xfffff) 92 u32 hi = (u32)((addr & ~(u64)0xfffff)
93 >> bus_bar0_window_target_bar0_window_base_shift_v()); 93 >> bus_bar0_window_target_bar0_window_base_shift_v());
94 u32 lo = (addr & 0xfffff); 94 u32 lo = (u32)(addr & 0xfffff);
95 u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ?
96 bus_bar0_window_target_sys_mem_noncoherent_f() :
97 bus_bar0_window_target_vid_mem_f()) |
98 bus_bar0_window_base_f(hi);
95 99
96 gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem); 100 gk20a_dbg(gpu_dbg_mem,
101 "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)",
102 hi, lo, mem, bufbase, bufbase + mem->size, mem->size);
97 103
98 WARN_ON(!bufbase); 104 WARN_ON(!bufbase);
99 spin_lock(&g->mm.pramin_base_lock); 105
100 if (g->mm.pramin_base != hi) { 106 spin_lock(&g->mm.pramin_window_lock);
101 gk20a_writel(g, bus_bar0_window_r(), 107
102 (g->mm.vidmem_is_vidmem 108 if (g->mm.pramin_window != win) {
103 && mem->aperture == APERTURE_SYSMEM ? 109 gk20a_writel(g, bus_bar0_window_r(), win);
104 bus_bar0_window_target_sys_mem_noncoherent_f() :
105 bus_bar0_window_target_vid_mem_f()) |
106 bus_bar0_window_base_f(hi));
107 gk20a_readl(g, bus_bar0_window_r()); 110 gk20a_readl(g, bus_bar0_window_r());
108 g->mm.pramin_base = hi; 111 g->mm.pramin_window = win;
109 } 112 }
110 113
111 return lo; 114 return lo;
@@ -114,7 +117,8 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
114static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) 117static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem)
115{ 118{
116 gk20a_dbg(gpu_dbg_mem, "end for %p", mem); 119 gk20a_dbg(gpu_dbg_mem, "end for %p", mem);
117 spin_unlock(&g->mm.pramin_base_lock); 120
121 spin_unlock(&g->mm.pramin_window_lock);
118} 122}
119 123
120u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) 124u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
@@ -174,7 +178,7 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
174 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 178 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
175 u32 addr = gk20a_pramin_enter(g, mem, w); 179 u32 addr = gk20a_pramin_enter(g, mem, w);
176 gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); 180 gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);
177 /* read back to synchronize accesses*/ 181 /* read back to synchronize accesses */
178 gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); 182 gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
179 gk20a_pramin_exit(g, mem); 183 gk20a_pramin_exit(g, mem);
180 } else { 184 } else {
@@ -604,8 +608,8 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
604 608
605static void gk20a_init_pramin(struct mm_gk20a *mm) 609static void gk20a_init_pramin(struct mm_gk20a *mm)
606{ 610{
607 mm->pramin_base = 0; 611 mm->pramin_window = 0;
608 spin_lock_init(&mm->pramin_base_lock); 612 spin_lock_init(&mm->pramin_window_lock);
609 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; 613 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
610} 614}
611 615
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index c58a4fec..e83e1111 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -365,8 +365,8 @@ struct mm_gk20a {
365 365
366 struct mem_desc sysmem_flush; 366 struct mem_desc sysmem_flush;
367 367
368 u32 pramin_base; 368 u32 pramin_window;
369 spinlock_t pramin_base_lock; 369 spinlock_t pramin_window_lock;
370#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) 370#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
371 u32 force_pramin; /* via debugfs */ 371 u32 force_pramin; /* via debugfs */
372#else 372#else