From 713f1ddcdfa5aec04bbb992e52a7d60fc68e966f Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Wed, 17 Aug 2016 17:47:41 +0530 Subject: gpu: nvgpu: support pramin access for non-contiguous vidmem API pramin_access_batched() currenly only supports contiguous allocations. Modify this API to support non-contiguous allocations from page allocator as well Update gk20a_mem_wr32() and gk20a_mem_rd32()to reuse pramin_access_batched() Use gk20a_memset() in gk20a_gmmu_free_attr_vid() to clear vidmem pages for kernel buffers Jira DNVGPU-30 Change-Id: I43630912f4837d8ebc6b9c58f4f427218ef9725b Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1204303 (cherry picked from commit 2f84f141d02fd2f641cb18a48896fb3ae5f7e51f) Reviewed-on: http://git-master/r/1210954 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 139 +++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 67 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 89390c30..179e6fc1 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -89,31 +89,11 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) mem->cpu_va = NULL; } -static u64 gk20a_mem_get_vidmem_addr(struct gk20a *g, struct mem_desc *mem) -{ - struct gk20a_page_alloc *alloc; - struct page_alloc_chunk *chunk; - - if (mem && mem->aperture == APERTURE_VIDMEM) { - alloc = (struct gk20a_page_alloc *) - sg_dma_address(mem->sgt->sgl); - - /* This API should not be used with > 1 chunks */ - if (alloc->nr_chunks != 1) - return 0; - - chunk = list_first_entry(&alloc->alloc_chunks, - struct page_alloc_chunk, list_entry); - return chunk->base; - } - - return 0; -} - /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ -static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) +static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, + struct page_alloc_chunk *chunk, u32 w) { - u64 bufbase = gk20a_mem_get_vidmem_addr(g, mem); + u64 bufbase = chunk->base; u64 addr = bufbase + w * sizeof(u32); u32 hi = (u32)((addr & ~(u64)0xfffff) >> bus_bar0_window_target_bar0_window_base_shift_v()); @@ -124,8 +104,9 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) bus_bar0_window_base_f(hi); gk20a_dbg(gpu_dbg_mem, - "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", - hi, lo, mem, bufbase, bufbase + mem->size, mem->size); + "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", + hi, lo, mem, chunk, bufbase, + bufbase + chunk->length, chunk->length); WARN_ON(!bufbase); @@ -140,42 +121,14 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) return lo; } -static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) +static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem, + struct page_alloc_chunk *chunk) { - gk20a_dbg(gpu_dbg_mem, "end for %p", mem); + gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); spin_unlock(&g->mm.pramin_window_lock); } -u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) -{ - u32 data = 0; - - if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { - u32 *ptr = mem->cpu_va; - - WARN_ON(!ptr); - data = ptr[w]; -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); -#endif - } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { - u32 addr = gk20a_pramin_enter(g, mem, w); - data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); - gk20a_pramin_exit(g, mem); - } else { - WARN_ON("Accessing unallocated mem_desc"); - } - - return data; -} - -u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) -{ - WARN_ON(offset & 3); - return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); -} - /* * Batch innerloop for the function below once per each PRAMIN range (some * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. @@ -191,22 +144,40 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) { + struct gk20a_page_alloc *alloc = NULL; + struct page_alloc_chunk *chunk = NULL; + u32 byteoff, start_reg, until_end, n; + + alloc = (struct gk20a_page_alloc *)sg_dma_address(mem->sgt->sgl); + list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { + if (offset >= chunk->length) + offset -= chunk->length; + else + break; + } + offset /= sizeof(u32); while (size) { - u32 byteoff = gk20a_pramin_enter(g, mem, offset); - u32 start_reg = pram_data032_r(byteoff / sizeof(u32)); - u32 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); - u32 n = min(size, until_end); + byteoff = gk20a_pramin_enter(g, mem, chunk, offset); + start_reg = pram_data032_r(byteoff / sizeof(u32)); + until_end = SZ_1M - (byteoff & (SZ_1M - 1)); + + n = min3(size, until_end, (u32)(chunk->length - offset)); loop(g, start_reg, n / sizeof(u32), arg); /* read back to synchronize accesses */ gk20a_readl(g, start_reg); - gk20a_pramin_exit(g, mem); + gk20a_pramin_exit(g, mem, chunk); offset += n / sizeof(u32); size -= n; + + if (n == (chunk->length - offset)) { + chunk = list_next_entry(chunk, list_entry); + offset = 0; + } } } @@ -247,6 +218,40 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start, } } +u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) +{ + u32 data = 0; + + if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { + u32 *ptr = mem->cpu_va; + + WARN_ON(!ptr); + data = ptr[w]; +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); +#endif + } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { + u32 value; + u32 *p = &value; + + pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), + pramin_access_batch_rd_n, &p); + + data = value; + + } else { + WARN_ON("Accessing unallocated mem_desc"); + } + + return data; +} + +u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) +{ + WARN_ON(offset & 3); + return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); +} + void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset, void *dest, u32 size) { @@ -284,11 +289,11 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) #endif ptr[w] = data; } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { - u32 addr = gk20a_pramin_enter(g, mem, w); - gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); - /* read back to synchronize accesses */ - gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); - gk20a_pramin_exit(g, mem); + u32 value = data; + u32 *p = &value; + + pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), + pramin_access_batch_wr_n, &p); } else { WARN_ON("Accessing unallocated mem_desc"); } @@ -3000,7 +3005,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, schedule_work(&g->mm.vidmem_clear_mem_worker); } } else { - /* TODO: clear with PRAMIN here */ + gk20a_memset(g, mem, 0, 0, mem->size); gk20a_free(mem->allocator, sg_dma_address(mem->sgt->sgl)); gk20a_free_sgtable(&mem->sgt); -- cgit v1.2.2