summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-04-20 11:12:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-30 11:35:23 -0400
commite7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch)
treee38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parentae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff)
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c7
1 files changed, 4 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 217f0056..1e247859 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -828,7 +828,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
828 0, 828 0,
829 0, 829 0,
830 0, 830 0,
831 args->mapping_size); 831 args->mapping_size,
832 NULL);
832 if (err) 833 if (err)
833 return err; 834 return err;
834 835
@@ -839,7 +840,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
839 virt_addr_hi = u64_hi32(args->offset); 840 virt_addr_hi = u64_hi32(args->offset);
840 /* but check anyway */ 841 /* but check anyway */
841 if (args->offset + virt_size > SZ_4G) { 842 if (args->offset + virt_size > SZ_4G) {
842 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); 843 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
843 return -EINVAL; 844 return -EINVAL;
844 } 845 }
845 846
@@ -881,7 +882,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
881 perf_pmasys_mem_block_valid_false_f() | 882 perf_pmasys_mem_block_valid_false_f() |
882 perf_pmasys_mem_block_target_f(0)); 883 perf_pmasys_mem_block_target_f(0));
883 884
884 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); 885 gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
885 886
886 return 0; 887 return 0;
887} 888}