summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-04-20 11:12:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-30 11:35:23 -0400
commite7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch)
treee38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parentae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff)
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu/mm_vgpu.c')
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 855aac0d..be1fa47d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -66,7 +66,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
66 u32 flags, 66 u32 flags,
67 int rw_flag, 67 int rw_flag,
68 bool clear_ctags, 68 bool clear_ctags,
69 bool sparse) 69 bool sparse,
70 struct vm_gk20a_mapping_batch *batch)
70{ 71{
71 int err = 0; 72 int err = 0;
72 struct device *d = dev_from_vm(vm); 73 struct device *d = dev_from_vm(vm);
@@ -130,7 +131,8 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
130 int pgsz_idx, 131 int pgsz_idx,
131 bool va_allocated, 132 bool va_allocated,
132 int rw_flag, 133 int rw_flag,
133 bool sparse) 134 bool sparse,
135 struct vm_gk20a_mapping_batch *batch)
134{ 136{
135 struct gk20a *g = gk20a_from_vm(vm); 137 struct gk20a *g = gk20a_from_vm(vm);
136 struct gk20a_platform *platform = gk20a_get_platform(g->dev); 138 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
@@ -182,7 +184,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
182 while (node) { 184 while (node) {
183 mapped_buffer = 185 mapped_buffer =
184 container_of(node, struct mapped_buffer_node, node); 186 container_of(node, struct mapped_buffer_node, node);
185 gk20a_vm_unmap_locked(mapped_buffer); 187 gk20a_vm_unmap_locked(mapped_buffer, NULL);
186 node = rb_first(&vm->mapped_buffers); 188 node = rb_first(&vm->mapped_buffers);
187 } 189 }
188 190