diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2015-04-20 11:12:22 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-06-30 11:35:23 -0400 |
commit | e7ba93fefbc4df9663302d240f9fbd5967a75a3c (patch) | |
tree | e38de3af69153d860d9cb666fb30be262321b198 /include/uapi/linux/nvgpu.h | |
parent | ae7b988b0d8767cfbc2cffe4c7ec8757e4dd94a6 (diff) |
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially
helps transform some per-map/unmap overhead to per-batch overhead,
namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB
invalidates. Batching with size 64 has been measured to yield >20x
speed-up in low-level fixed-address mapping microbenchmarks.
Bug 1614735
Bug 1623949
Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/733231
(cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91)
Reviewed-on: http://git-master/r/763812
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'include/uapi/linux/nvgpu.h')
-rw-r--r-- | include/uapi/linux/nvgpu.h | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index f7b68380..c4edd305 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -172,7 +172,10 @@ struct nvgpu_gpu_characteristics { | |||
172 | __u32 lts_per_ltc; | 172 | __u32 lts_per_ltc; |
173 | __u32 cbc_cache_line_size; | 173 | __u32 cbc_cache_line_size; |
174 | __u32 cbc_comptags_per_line; | 174 | __u32 cbc_comptags_per_line; |
175 | __u32 reserved2; | 175 | |
176 | /* MAP_BUFFER_BATCH: the upper limit for num_unmaps and | ||
177 | * num_maps */ | ||
178 | __u32 map_buffer_batch_limit; | ||
176 | 179 | ||
177 | /* Notes: | 180 | /* Notes: |
178 | - This struct can be safely appended with new fields. However, always | 181 | - This struct can be safely appended with new fields. However, always |
@@ -1031,6 +1034,16 @@ struct nvgpu_as_get_va_regions_args { | |||
1031 | __u32 reserved; | 1034 | __u32 reserved; |
1032 | }; | 1035 | }; |
1033 | 1036 | ||
1037 | struct nvgpu_as_map_buffer_batch_args { | ||
1038 | __u64 unmaps; /* ptr to array of nvgpu_unmap_buffer_args */ | ||
1039 | __u64 maps; /* ptr to array of nvgpu_as_map_buffer_ex_args */ | ||
1040 | __u32 num_unmaps; /* in: number of unmaps | ||
1041 | * out: on error, number of successful unmaps */ | ||
1042 | __u32 num_maps; /* in: number of maps | ||
1043 | * out: on error, number of successful maps */ | ||
1044 | __u64 reserved; | ||
1045 | }; | ||
1046 | |||
1034 | #define NVGPU_AS_IOCTL_BIND_CHANNEL \ | 1047 | #define NVGPU_AS_IOCTL_BIND_CHANNEL \ |
1035 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args) | 1048 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args) |
1036 | #define NVGPU32_AS_IOCTL_ALLOC_SPACE \ | 1049 | #define NVGPU32_AS_IOCTL_ALLOC_SPACE \ |
@@ -1051,9 +1064,11 @@ struct nvgpu_as_get_va_regions_args { | |||
1051 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args) | 1064 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args) |
1052 | #define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \ | 1065 | #define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \ |
1053 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args) | 1066 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args) |
1067 | #define NVGPU_AS_IOCTL_MAP_BUFFER_BATCH \ | ||
1068 | _IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args) | ||
1054 | 1069 | ||
1055 | #define NVGPU_AS_IOCTL_LAST \ | 1070 | #define NVGPU_AS_IOCTL_LAST \ |
1056 | _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS) | 1071 | _IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_BATCH) |
1057 | #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ | 1072 | #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ |
1058 | sizeof(struct nvgpu_as_map_buffer_ex_args) | 1073 | sizeof(struct nvgpu_as_map_buffer_ex_args) |
1059 | 1074 | ||