From e7ba93fefbc4df9663302d240f9fbd5967a75a3c Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Mon, 20 Apr 2015 18:12:22 +0300 Subject: gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 217f0056..1e247859 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -828,7 +828,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, 0, 0, 0, - args->mapping_size); + args->mapping_size, + NULL); if (err) return err; @@ -839,7 +840,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, virt_addr_hi = u64_hi32(args->offset); /* but check anyway */ if (args->offset + virt_size > SZ_4G) { - gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); + gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); return -EINVAL; } @@ -881,7 +882,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, perf_pmasys_mem_block_valid_false_f() | perf_pmasys_mem_block_target_f(0)); - gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); + gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); return 0; } -- cgit v1.2.2