From e7ba93fefbc4df9663302d240f9fbd5967a75a3c Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Mon, 20 Apr 2015 18:12:22 +0300 Subject: gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/as_gk20a.c | 91 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/as_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index eb18fa65..feb22ea8 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -151,8 +151,8 @@ static int gk20a_as_ioctl_map_buffer_ex( &args->offset, args->flags, args->kind, args->buffer_offset, - args->mapping_size - ); + args->mapping_size, + NULL); } static int gk20a_as_ioctl_map_buffer( @@ -163,7 +163,7 @@ static int gk20a_as_ioctl_map_buffer( return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, &args->o_a.offset, args->flags, NV_KIND_DEFAULT, - 0, 0); + 0, 0, NULL); /* args->o_a.offset will be set if !err */ } @@ -172,7 +172,86 @@ static int gk20a_as_ioctl_unmap_buffer( struct nvgpu_as_unmap_buffer_args *args) { gk20a_dbg_fn(""); - return gk20a_vm_unmap_buffer(as_share->vm, args->offset); + return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL); +} + +static int gk20a_as_ioctl_map_buffer_batch( + struct gk20a_as_share *as_share, + struct nvgpu_as_map_buffer_batch_args *args) +{ + struct gk20a *g = as_share->vm->mm->g; + u32 i; + int err = 0; + + struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = + (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) + args->unmaps; + struct nvgpu_as_map_buffer_ex_args __user *user_map_args = + (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) + args->maps; + + struct vm_gk20a_mapping_batch batch; + + gk20a_dbg_fn(""); + + if (args->num_unmaps > g->gpu_characteristics.map_buffer_batch_limit || + args->num_maps > g->gpu_characteristics.map_buffer_batch_limit) + return -EINVAL; + + gk20a_vm_mapping_batch_start(&batch); + + for (i = 0; i < args->num_unmaps; ++i) { + struct nvgpu_as_unmap_buffer_args unmap_args; + + if (copy_from_user(&unmap_args, &user_unmap_args[i], + sizeof(unmap_args))) { + err = -EFAULT; + break; + } + + err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset, + &batch); + if (err) + break; + } + + if (err) { + gk20a_vm_mapping_batch_finish(as_share->vm, &batch); + + args->num_unmaps = i; + args->num_maps = 0; + return err; + } + + for (i = 0; i < args->num_maps; ++i) { + struct nvgpu_as_map_buffer_ex_args map_args; + memset(&map_args, 0, sizeof(map_args)); + + if (copy_from_user(&map_args, &user_map_args[i], + sizeof(map_args))) { + err = -EFAULT; + break; + } + + err = gk20a_vm_map_buffer( + as_share->vm, map_args.dmabuf_fd, + &map_args.offset, map_args.flags, + map_args.kind, + map_args.buffer_offset, + map_args.mapping_size, + &batch); + if (err) + break; + } + + gk20a_vm_mapping_batch_finish(as_share->vm, &batch); + + if (err) + args->num_maps = i; + /* note: args->num_unmaps will be unmodified, which is ok + * since all unmaps are done */ + + return err; } static int gk20a_as_ioctl_get_va_regions( @@ -360,6 +439,10 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = gk20a_as_ioctl_map_buffer_compbits(as_share, (struct nvgpu_as_map_buffer_compbits_args *)buf); break; + case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: + err = gk20a_as_ioctl_map_buffer_batch(as_share, + (struct nvgpu_as_map_buffer_batch_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); err = -ENOTTY; -- cgit v1.2.2