diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2015-04-20 11:12:22 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-06-30 11:35:23 -0400 |
commit | e7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch) | |
tree | e38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |
parent | ae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff) |
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially
helps transform some per-map/unmap overhead to per-batch overhead,
namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB
invalidates. Batching with size 64 has been measured to yield >20x
speed-up in low-level fixed-address mapping microbenchmarks.
Bug 1614735
Bug 1623949
Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/733231
(cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91)
Reviewed-on: http://git-master/r/763812
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 217f0056..1e247859 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -828,7 +828,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
828 | 0, | 828 | 0, |
829 | 0, | 829 | 0, |
830 | 0, | 830 | 0, |
831 | args->mapping_size); | 831 | args->mapping_size, |
832 | NULL); | ||
832 | if (err) | 833 | if (err) |
833 | return err; | 834 | return err; |
834 | 835 | ||
@@ -839,7 +840,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
839 | virt_addr_hi = u64_hi32(args->offset); | 840 | virt_addr_hi = u64_hi32(args->offset); |
840 | /* but check anyway */ | 841 | /* but check anyway */ |
841 | if (args->offset + virt_size > SZ_4G) { | 842 | if (args->offset + virt_size > SZ_4G) { |
842 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | 843 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); |
843 | return -EINVAL; | 844 | return -EINVAL; |
844 | } | 845 | } |
845 | 846 | ||
@@ -881,7 +882,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | |||
881 | perf_pmasys_mem_block_valid_false_f() | | 882 | perf_pmasys_mem_block_valid_false_f() | |
882 | perf_pmasys_mem_block_target_f(0)); | 883 | perf_pmasys_mem_block_target_f(0)); |
883 | 884 | ||
884 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset); | 885 | gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL); |
885 | 886 | ||
886 | return 0; | 887 | return 0; |
887 | } | 888 | } |