summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-04-20 11:12:22 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-30 11:35:23 -0400
commite7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch)
treee38de3af69153d860d9cb666fb30be262321b198 /include
parentae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff)
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'include')
-rw-r--r--include/uapi/linux/nvgpu.h19
1 files changed, 17 insertions, 2 deletions
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index f7b68380..c4edd305 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -172,7 +172,10 @@ struct nvgpu_gpu_characteristics {
172 __u32 lts_per_ltc; 172 __u32 lts_per_ltc;
173 __u32 cbc_cache_line_size; 173 __u32 cbc_cache_line_size;
174 __u32 cbc_comptags_per_line; 174 __u32 cbc_comptags_per_line;
175 __u32 reserved2; 175
176 /* MAP_BUFFER_BATCH: the upper limit for num_unmaps and
177 * num_maps */
178 __u32 map_buffer_batch_limit;
176 179
177 /* Notes: 180 /* Notes:
178 - This struct can be safely appended with new fields. However, always 181 - This struct can be safely appended with new fields. However, always
@@ -1031,6 +1034,16 @@ struct nvgpu_as_get_va_regions_args {
1031 __u32 reserved; 1034 __u32 reserved;
1032}; 1035};
1033 1036
1037struct nvgpu_as_map_buffer_batch_args {
1038 __u64 unmaps; /* ptr to array of nvgpu_unmap_buffer_args */
1039 __u64 maps; /* ptr to array of nvgpu_as_map_buffer_ex_args */
1040 __u32 num_unmaps; /* in: number of unmaps
1041 * out: on error, number of successful unmaps */
1042 __u32 num_maps; /* in: number of maps
1043 * out: on error, number of successful maps */
1044 __u64 reserved;
1045};
1046
1034#define NVGPU_AS_IOCTL_BIND_CHANNEL \ 1047#define NVGPU_AS_IOCTL_BIND_CHANNEL \
1035 _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args) 1048 _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args)
1036#define NVGPU32_AS_IOCTL_ALLOC_SPACE \ 1049#define NVGPU32_AS_IOCTL_ALLOC_SPACE \
@@ -1051,9 +1064,11 @@ struct nvgpu_as_get_va_regions_args {
1051 _IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args) 1064 _IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args)
1052#define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \ 1065#define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \
1053 _IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args) 1066 _IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args)
1067#define NVGPU_AS_IOCTL_MAP_BUFFER_BATCH \
1068 _IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args)
1054 1069
1055#define NVGPU_AS_IOCTL_LAST \ 1070#define NVGPU_AS_IOCTL_LAST \
1056 _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS) 1071 _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_BATCH)
1057#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ 1072#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
1058 sizeof(struct nvgpu_as_map_buffer_ex_args) 1073 sizeof(struct nvgpu_as_map_buffer_ex_args)
1059 1074