diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-06-06 09:22:48 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-06-07 12:24:14 -0400 |
commit | 8432f6d80a0590e8ac10849443ac27351471505e (patch) | |
tree | 34e61ffc7ffe6db33a102d0d0188deb0b0e582a3 /drivers/gpu/nvgpu/gk20a | |
parent | 697c8c1a98f09906c6513a0ed62052a375e83dc9 (diff) |
gpu: nvgpu: cache whole bar0_window for mem accesses
Save the whole bar0 window register that encodes also the target
aperture (vid/sys mem) instead of only the base address that could
overlap between the two.
JIRA DNVGPU-23
Change-Id: I2ccbea0e1f7c7310c1ca6b158afafe8fd974a615
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1159523
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 36 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 |
2 files changed, 22 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index ec946fb6..6505015f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -84,28 +84,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | |||
84 | mem->cpu_va = NULL; | 84 | mem->cpu_va = NULL; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */ | 87 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ |
88 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | 88 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) |
89 | { | 89 | { |
90 | u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | 90 | u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); |
91 | u64 addr = bufbase + w * sizeof(u32); | 91 | u64 addr = bufbase + w * sizeof(u32); |
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 92 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
94 | u32 lo = (addr & 0xfffff); | 94 | u32 lo = (u32)(addr & 0xfffff); |
95 | u32 win = (g->mm.vidmem_is_vidmem && mem->aperture == APERTURE_SYSMEM ? | ||
96 | bus_bar0_window_target_sys_mem_noncoherent_f() : | ||
97 | bus_bar0_window_target_vid_mem_f()) | | ||
98 | bus_bar0_window_base_f(hi); | ||
95 | 99 | ||
96 | gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem); | 100 | gk20a_dbg(gpu_dbg_mem, |
101 | "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", | ||
102 | hi, lo, mem, bufbase, bufbase + mem->size, mem->size); | ||
97 | 103 | ||
98 | WARN_ON(!bufbase); | 104 | WARN_ON(!bufbase); |
99 | spin_lock(&g->mm.pramin_base_lock); | 105 | |
100 | if (g->mm.pramin_base != hi) { | 106 | spin_lock(&g->mm.pramin_window_lock); |
101 | gk20a_writel(g, bus_bar0_window_r(), | 107 | |
102 | (g->mm.vidmem_is_vidmem | 108 | if (g->mm.pramin_window != win) { |
103 | && mem->aperture == APERTURE_SYSMEM ? | 109 | gk20a_writel(g, bus_bar0_window_r(), win); |
104 | bus_bar0_window_target_sys_mem_noncoherent_f() : | ||
105 | bus_bar0_window_target_vid_mem_f()) | | ||
106 | bus_bar0_window_base_f(hi)); | ||
107 | gk20a_readl(g, bus_bar0_window_r()); | 110 | gk20a_readl(g, bus_bar0_window_r()); |
108 | g->mm.pramin_base = hi; | 111 | g->mm.pramin_window = win; |
109 | } | 112 | } |
110 | 113 | ||
111 | return lo; | 114 | return lo; |
@@ -114,7 +117,8 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
114 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) | 117 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) |
115 | { | 118 | { |
116 | gk20a_dbg(gpu_dbg_mem, "end for %p", mem); | 119 | gk20a_dbg(gpu_dbg_mem, "end for %p", mem); |
117 | spin_unlock(&g->mm.pramin_base_lock); | 120 | |
121 | spin_unlock(&g->mm.pramin_window_lock); | ||
118 | } | 122 | } |
119 | 123 | ||
120 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | 124 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) |
@@ -174,7 +178,7 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | |||
174 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 178 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
175 | u32 addr = gk20a_pramin_enter(g, mem, w); | 179 | u32 addr = gk20a_pramin_enter(g, mem, w); |
176 | gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); | 180 | gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); |
177 | /* read back to synchronize accesses*/ | 181 | /* read back to synchronize accesses */ |
178 | gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | 182 | gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); |
179 | gk20a_pramin_exit(g, mem); | 183 | gk20a_pramin_exit(g, mem); |
180 | } else { | 184 | } else { |
@@ -604,8 +608,8 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) | |||
604 | 608 | ||
605 | static void gk20a_init_pramin(struct mm_gk20a *mm) | 609 | static void gk20a_init_pramin(struct mm_gk20a *mm) |
606 | { | 610 | { |
607 | mm->pramin_base = 0; | 611 | mm->pramin_window = 0; |
608 | spin_lock_init(&mm->pramin_base_lock); | 612 | spin_lock_init(&mm->pramin_window_lock); |
609 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | 613 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; |
610 | } | 614 | } |
611 | 615 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c58a4fec..e83e1111 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -365,8 +365,8 @@ struct mm_gk20a { | |||
365 | 365 | ||
366 | struct mem_desc sysmem_flush; | 366 | struct mem_desc sysmem_flush; |
367 | 367 | ||
368 | u32 pramin_base; | 368 | u32 pramin_window; |
369 | spinlock_t pramin_base_lock; | 369 | spinlock_t pramin_window_lock; |
370 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) | 370 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) |
371 | u32 force_pramin; /* via debugfs */ | 371 | u32 force_pramin; /* via debugfs */ |
372 | #else | 372 | #else |