summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/as_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-04-20 11:12:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-30 11:35:23 -0400
commite7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch)
treee38de3af69153d860d9cb666fb30be262321b198 /drivers/gpu/nvgpu/gk20a/as_gk20a.c
parentae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff)
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/as_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c91
1 files changed, 87 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index eb18fa65..feb22ea8 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -151,8 +151,8 @@ static int gk20a_as_ioctl_map_buffer_ex(
151 &args->offset, args->flags, 151 &args->offset, args->flags,
152 args->kind, 152 args->kind,
153 args->buffer_offset, 153 args->buffer_offset,
154 args->mapping_size 154 args->mapping_size,
155 ); 155 NULL);
156} 156}
157 157
158static int gk20a_as_ioctl_map_buffer( 158static int gk20a_as_ioctl_map_buffer(
@@ -163,7 +163,7 @@ static int gk20a_as_ioctl_map_buffer(
163 return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, 163 return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd,
164 &args->o_a.offset, 164 &args->o_a.offset,
165 args->flags, NV_KIND_DEFAULT, 165 args->flags, NV_KIND_DEFAULT,
166 0, 0); 166 0, 0, NULL);
167 /* args->o_a.offset will be set if !err */ 167 /* args->o_a.offset will be set if !err */
168} 168}
169 169
@@ -172,7 +172,86 @@ static int gk20a_as_ioctl_unmap_buffer(
172 struct nvgpu_as_unmap_buffer_args *args) 172 struct nvgpu_as_unmap_buffer_args *args)
173{ 173{
174 gk20a_dbg_fn(""); 174 gk20a_dbg_fn("");
175 return gk20a_vm_unmap_buffer(as_share->vm, args->offset); 175 return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL);
176}
177
178static int gk20a_as_ioctl_map_buffer_batch(
179 struct gk20a_as_share *as_share,
180 struct nvgpu_as_map_buffer_batch_args *args)
181{
182 struct gk20a *g = as_share->vm->mm->g;
183 u32 i;
184 int err = 0;
185
186 struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
187 (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
188 args->unmaps;
189 struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
190 (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
191 args->maps;
192
193 struct vm_gk20a_mapping_batch batch;
194
195 gk20a_dbg_fn("");
196
197 if (args->num_unmaps > g->gpu_characteristics.map_buffer_batch_limit ||
198 args->num_maps > g->gpu_characteristics.map_buffer_batch_limit)
199 return -EINVAL;
200
201 gk20a_vm_mapping_batch_start(&batch);
202
203 for (i = 0; i < args->num_unmaps; ++i) {
204 struct nvgpu_as_unmap_buffer_args unmap_args;
205
206 if (copy_from_user(&unmap_args, &user_unmap_args[i],
207 sizeof(unmap_args))) {
208 err = -EFAULT;
209 break;
210 }
211
212 err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset,
213 &batch);
214 if (err)
215 break;
216 }
217
218 if (err) {
219 gk20a_vm_mapping_batch_finish(as_share->vm, &batch);
220
221 args->num_unmaps = i;
222 args->num_maps = 0;
223 return err;
224 }
225
226 for (i = 0; i < args->num_maps; ++i) {
227 struct nvgpu_as_map_buffer_ex_args map_args;
228 memset(&map_args, 0, sizeof(map_args));
229
230 if (copy_from_user(&map_args, &user_map_args[i],
231 sizeof(map_args))) {
232 err = -EFAULT;
233 break;
234 }
235
236 err = gk20a_vm_map_buffer(
237 as_share->vm, map_args.dmabuf_fd,
238 &map_args.offset, map_args.flags,
239 map_args.kind,
240 map_args.buffer_offset,
241 map_args.mapping_size,
242 &batch);
243 if (err)
244 break;
245 }
246
247 gk20a_vm_mapping_batch_finish(as_share->vm, &batch);
248
249 if (err)
250 args->num_maps = i;
251 /* note: args->num_unmaps will be unmodified, which is ok
252 * since all unmaps are done */
253
254 return err;
176} 255}
177 256
178static int gk20a_as_ioctl_get_va_regions( 257static int gk20a_as_ioctl_get_va_regions(
@@ -360,6 +439,10 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
360 err = gk20a_as_ioctl_map_buffer_compbits(as_share, 439 err = gk20a_as_ioctl_map_buffer_compbits(as_share,
361 (struct nvgpu_as_map_buffer_compbits_args *)buf); 440 (struct nvgpu_as_map_buffer_compbits_args *)buf);
362 break; 441 break;
442 case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
443 err = gk20a_as_ioctl_map_buffer_batch(as_share,
444 (struct nvgpu_as_map_buffer_batch_args *)buf);
445 break;
363 default: 446 default:
364 dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); 447 dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
365 err = -ENOTTY; 448 err = -ENOTTY;