diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2015-04-20 11:12:22 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-06-30 11:35:23 -0400 |
commit | e7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch) | |
tree | e38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |
parent | ae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff) |
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially
helps transform some per-map/unmap overhead to per-batch overhead,
namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB
invalidates. Batching with size 64 has been measured to yield >20x
speed-up in low-level fixed-address mapping microbenchmarks.
Bug 1614735
Bug 1623949
Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/733231
(cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91)
Reviewed-on: http://git-master/r/763812
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu/mm_vgpu.c')
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 855aac0d..be1fa47d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -66,7 +66,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
66 | u32 flags, | 66 | u32 flags, |
67 | int rw_flag, | 67 | int rw_flag, |
68 | bool clear_ctags, | 68 | bool clear_ctags, |
69 | bool sparse) | 69 | bool sparse, |
70 | struct vm_gk20a_mapping_batch *batch) | ||
70 | { | 71 | { |
71 | int err = 0; | 72 | int err = 0; |
72 | struct device *d = dev_from_vm(vm); | 73 | struct device *d = dev_from_vm(vm); |
@@ -130,7 +131,8 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
130 | int pgsz_idx, | 131 | int pgsz_idx, |
131 | bool va_allocated, | 132 | bool va_allocated, |
132 | int rw_flag, | 133 | int rw_flag, |
133 | bool sparse) | 134 | bool sparse, |
135 | struct vm_gk20a_mapping_batch *batch) | ||
134 | { | 136 | { |
135 | struct gk20a *g = gk20a_from_vm(vm); | 137 | struct gk20a *g = gk20a_from_vm(vm); |
136 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | 138 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); |
@@ -182,7 +184,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) | |||
182 | while (node) { | 184 | while (node) { |
183 | mapped_buffer = | 185 | mapped_buffer = |
184 | container_of(node, struct mapped_buffer_node, node); | 186 | container_of(node, struct mapped_buffer_node, node); |
185 | gk20a_vm_unmap_locked(mapped_buffer); | 187 | gk20a_vm_unmap_locked(mapped_buffer, NULL); |
186 | node = rb_first(&vm->mapped_buffers); | 188 | node = rb_first(&vm->mapped_buffers); |
187 | } | 189 | } |
188 | 190 | ||