From e7ba93fefbc4df9663302d240f9fbd5967a75a3c Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Mon, 20 Apr 2015 18:12:22 +0300 Subject: gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- include/uapi/linux/nvgpu.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index f7b68380..c4edd305 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -172,7 +172,10 @@ struct nvgpu_gpu_characteristics { __u32 lts_per_ltc; __u32 cbc_cache_line_size; __u32 cbc_comptags_per_line; - __u32 reserved2; + + /* MAP_BUFFER_BATCH: the upper limit for num_unmaps and + * num_maps */ + __u32 map_buffer_batch_limit; /* Notes: - This struct can be safely appended with new fields. However, always @@ -1031,6 +1034,16 @@ struct nvgpu_as_get_va_regions_args { __u32 reserved; }; +struct nvgpu_as_map_buffer_batch_args { + __u64 unmaps; /* ptr to array of nvgpu_unmap_buffer_args */ + __u64 maps; /* ptr to array of nvgpu_as_map_buffer_ex_args */ + __u32 num_unmaps; /* in: number of unmaps + * out: on error, number of successful unmaps */ + __u32 num_maps; /* in: number of maps + * out: on error, number of successful maps */ + __u64 reserved; +}; + #define NVGPU_AS_IOCTL_BIND_CHANNEL \ _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args) #define NVGPU32_AS_IOCTL_ALLOC_SPACE \ @@ -1051,9 +1064,11 @@ struct nvgpu_as_get_va_regions_args { _IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args) #define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \ _IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args) +#define NVGPU_AS_IOCTL_MAP_BUFFER_BATCH \ + _IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args) #define NVGPU_AS_IOCTL_LAST \ - _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS) + _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_BATCH) #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_as_map_buffer_ex_args) -- cgit v1.2.2