diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-06-05 15:53:16 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-22 13:04:16 -0400 |
commit | 840e039d57d4acfb2be2a82c4b95a6d25c7aacd4 (patch) | |
tree | a4a358cf6bb5d005e8f6db159d1b3b03ddbf94df | |
parent | 46666ed101847d9b87ea60cd432dea97afbef0b1 (diff) |
gpu: nvgpu: Update Linux side VM code for API solidification
Update the Linux specific code to match the MM API docs in the
previous patch. The user passed page size is plumbed through
the Linux VM mapping calls but is ultimately ignored once the
core VM code is called. This will be handled in the next
patch.
This also adds some code to make the CDE page size picking
happen semi-intelligently. In many cases the CDE buffers can
be mapped with large pages.
Bug 2011640
Change-Id: I20e78e7d5a841e410864b474179e71da1c2482f4
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1740610
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/linux/vm.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/cde.c | 29 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_as.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/vm.c | 39 |
5 files changed, 65 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h index 97b8334b..6f3beaa9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h | |||
@@ -54,8 +54,9 @@ struct nvgpu_mapped_buf_priv { | |||
54 | /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */ | 54 | /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */ |
55 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, | 55 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, |
56 | struct dma_buf *dmabuf, | 56 | struct dma_buf *dmabuf, |
57 | u64 offset_align, | 57 | u64 map_addr, |
58 | u32 flags, | 58 | u32 flags, |
59 | u32 page_size, | ||
59 | s16 compr_kind, | 60 | s16 compr_kind, |
60 | s16 incompr_kind, | 61 | s16 incompr_kind, |
61 | int rw_flag, | 62 | int rw_flag, |
@@ -71,8 +72,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
71 | */ | 72 | */ |
72 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | 73 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, |
73 | int dmabuf_fd, | 74 | int dmabuf_fd, |
74 | u64 *offset_align, | 75 | u64 *map_addr, |
75 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ | 76 | u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ |
77 | u32 page_size, | ||
76 | s16 compr_kind, | 78 | s16 compr_kind, |
77 | s16 incompr_kind, | 79 | s16 incompr_kind, |
78 | u64 buffer_offset, | 80 | u64 buffer_offset, |
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 66a80403..052a1d21 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c | |||
@@ -975,6 +975,30 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l | |||
975 | return cde_ctx; | 975 | return cde_ctx; |
976 | } | 976 | } |
977 | 977 | ||
978 | static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm, | ||
979 | u32 map_offset, u32 map_size) | ||
980 | { | ||
981 | struct gk20a *g = gk20a_from_vm(vm); | ||
982 | |||
983 | /* | ||
984 | * To be simple we will just make the map size depend on the | ||
985 | * iommu'ability of the driver. If there's an IOMMU we can rely on | ||
986 | * buffers being contiguous. If not, then we'll use 4k pages since we | ||
987 | * know that will work for any buffer. | ||
988 | */ | ||
989 | if (!nvgpu_iommuable(g)) | ||
990 | return SZ_4K; | ||
991 | |||
992 | /* | ||
993 | * If map size or offset is not 64K aligned then use small pages. | ||
994 | */ | ||
995 | if (map_size & (vm->big_page_size - 1) || | ||
996 | map_offset & (vm->big_page_size - 1)) | ||
997 | return SZ_4K; | ||
998 | |||
999 | return vm->big_page_size; | ||
1000 | } | ||
1001 | |||
978 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | 1002 | int gk20a_cde_convert(struct nvgpu_os_linux *l, |
979 | struct dma_buf *compbits_scatter_buf, | 1003 | struct dma_buf *compbits_scatter_buf, |
980 | u64 compbits_byte_offset, | 1004 | u64 compbits_byte_offset, |
@@ -1071,7 +1095,10 @@ __releases(&l->cde_app->mutex) | |||
1071 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, | 1095 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, |
1072 | NVGPU_VM_MAP_CACHEABLE | | 1096 | NVGPU_VM_MAP_CACHEABLE | |
1073 | NVGPU_VM_MAP_DIRECT_KIND_CTRL, | 1097 | NVGPU_VM_MAP_DIRECT_KIND_CTRL, |
1074 | NVGPU_KIND_INVALID, | 1098 | gk20a_cde_mapping_page_size(cde_ctx->vm, |
1099 | map_offset, | ||
1100 | map_size), | ||
1101 | NV_KIND_INVALID, | ||
1075 | compbits_kind, /* incompressible kind */ | 1102 | compbits_kind, /* incompressible kind */ |
1076 | gk20a_mem_flag_none, | 1103 | gk20a_mem_flag_none, |
1077 | map_offset, map_size, | 1104 | map_offset, map_size, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 47f612cc..5eb9802f 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c | |||
@@ -111,6 +111,7 @@ static int gk20a_as_ioctl_map_buffer_ex( | |||
111 | 111 | ||
112 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, | 112 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, |
113 | &args->offset, args->flags, | 113 | &args->offset, args->flags, |
114 | args->page_size, | ||
114 | args->compr_kind, | 115 | args->compr_kind, |
115 | args->incompr_kind, | 116 | args->incompr_kind, |
116 | args->buffer_offset, | 117 | args->buffer_offset, |
@@ -201,7 +202,7 @@ static int gk20a_as_ioctl_map_buffer_batch( | |||
201 | 202 | ||
202 | err = nvgpu_vm_map_buffer( | 203 | err = nvgpu_vm_map_buffer( |
203 | as_share->vm, map_args.dmabuf_fd, | 204 | as_share->vm, map_args.dmabuf_fd, |
204 | &map_args.offset, map_args.flags, | 205 | &map_args.offset, map_args.flags, map_args.page_size, |
205 | compressible_kind, incompressible_kind, | 206 | compressible_kind, incompressible_kind, |
206 | map_args.buffer_offset, | 207 | map_args.buffer_offset, |
207 | map_args.mapping_size, | 208 | map_args.mapping_size, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 76ff25c0..938e0abd 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | |||
@@ -1372,10 +1372,11 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1372 | args->dmabuf_fd, | 1372 | args->dmabuf_fd, |
1373 | &args->offset, | 1373 | &args->offset, |
1374 | 0, | 1374 | 0, |
1375 | SZ_4K, | ||
1376 | 0, | ||
1375 | 0, | 1377 | 0, |
1376 | 0, | 1378 | 0, |
1377 | 0, | 1379 | 0, |
1378 | args->mapping_size, | ||
1379 | NULL); | 1380 | NULL); |
1380 | if (err) | 1381 | if (err) |
1381 | goto err_remove_vm; | 1382 | goto err_remove_vm; |
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index baa77515..eb9ca8fd 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c | |||
@@ -175,8 +175,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
175 | 175 | ||
176 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, | 176 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, |
177 | struct dma_buf *dmabuf, | 177 | struct dma_buf *dmabuf, |
178 | u64 offset_align, | 178 | u64 map_addr, |
179 | u32 flags, | 179 | u32 flags, |
180 | u32 page_size, | ||
180 | s16 compr_kind, | 181 | s16 compr_kind, |
181 | s16 incompr_kind, | 182 | s16 incompr_kind, |
182 | int rw_flag, | 183 | int rw_flag, |
@@ -192,12 +193,8 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
192 | struct nvgpu_sgt *nvgpu_sgt = NULL; | 193 | struct nvgpu_sgt *nvgpu_sgt = NULL; |
193 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | 194 | struct nvgpu_mapped_buf *mapped_buffer = NULL; |
194 | struct dma_buf_attachment *attachment; | 195 | struct dma_buf_attachment *attachment; |
195 | u64 map_addr = 0ULL; | ||
196 | int err = 0; | 196 | int err = 0; |
197 | 197 | ||
198 | if (flags & NVGPU_VM_MAP_FIXED_OFFSET) | ||
199 | map_addr = offset_align; | ||
200 | |||
201 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | 198 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); |
202 | if (IS_ERR(sgt)) { | 199 | if (IS_ERR(sgt)) { |
203 | nvgpu_warn(g, "Failed to pin dma_buf!"); | 200 | nvgpu_warn(g, "Failed to pin dma_buf!"); |
@@ -253,8 +250,9 @@ clean_up: | |||
253 | 250 | ||
254 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | 251 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, |
255 | int dmabuf_fd, | 252 | int dmabuf_fd, |
256 | u64 *offset_align, | 253 | u64 *map_addr, |
257 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | 254 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ |
255 | u32 page_size, | ||
258 | s16 compr_kind, | 256 | s16 compr_kind, |
259 | s16 incompr_kind, | 257 | s16 incompr_kind, |
260 | u64 buffer_offset, | 258 | u64 buffer_offset, |
@@ -274,8 +272,28 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
274 | return PTR_ERR(dmabuf); | 272 | return PTR_ERR(dmabuf); |
275 | } | 273 | } |
276 | 274 | ||
275 | /* | ||
276 | * For regular maps we do not accept either an input address or a | ||
277 | * buffer_offset. | ||
278 | */ | ||
279 | if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) && | ||
280 | (buffer_offset || *map_addr)) { | ||
281 | nvgpu_err(g, | ||
282 | "Regular map with addr/buf offset is not supported!"); | ||
283 | return -EINVAL; | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Map size is always buffer size for non fixed mappings. As such map | ||
288 | * size should be left as zero by userspace for non-fixed maps. | ||
289 | */ | ||
290 | if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | ||
291 | nvgpu_err(g, "map_size && non-fixed-mapping!"); | ||
292 | return -EINVAL; | ||
293 | } | ||
294 | |||
277 | /* verify that we're not overflowing the buffer, i.e. | 295 | /* verify that we're not overflowing the buffer, i.e. |
278 | * (buffer_offset + mapping_size)> dmabuf->size. | 296 | * (buffer_offset + mapping_size) > dmabuf->size. |
279 | * | 297 | * |
280 | * Since buffer_offset + mapping_size could overflow, first check | 298 | * Since buffer_offset + mapping_size could overflow, first check |
281 | * that mapping size < dmabuf_size, at which point we can subtract | 299 | * that mapping size < dmabuf_size, at which point we can subtract |
@@ -284,7 +302,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
284 | if ((mapping_size > dmabuf->size) || | 302 | if ((mapping_size > dmabuf->size) || |
285 | (buffer_offset > (dmabuf->size - mapping_size))) { | 303 | (buffer_offset > (dmabuf->size - mapping_size))) { |
286 | nvgpu_err(g, | 304 | nvgpu_err(g, |
287 | "buf size %llx < (offset(%llx) + map_size(%llx))\n", | 305 | "buf size %llx < (offset(%llx) + map_size(%llx))", |
288 | (u64)dmabuf->size, buffer_offset, mapping_size); | 306 | (u64)dmabuf->size, buffer_offset, mapping_size); |
289 | dma_buf_put(dmabuf); | 307 | dma_buf_put(dmabuf); |
290 | return -EINVAL; | 308 | return -EINVAL; |
@@ -296,8 +314,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
296 | return err; | 314 | return err; |
297 | } | 315 | } |
298 | 316 | ||
299 | err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align, | 317 | err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr, |
300 | nvgpu_vm_translate_linux_flags(g, flags), | 318 | nvgpu_vm_translate_linux_flags(g, flags), |
319 | page_size, | ||
301 | compr_kind, incompr_kind, | 320 | compr_kind, incompr_kind, |
302 | gk20a_mem_flag_none, | 321 | gk20a_mem_flag_none, |
303 | buffer_offset, | 322 | buffer_offset, |
@@ -306,7 +325,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | |||
306 | &ret_va); | 325 | &ret_va); |
307 | 326 | ||
308 | if (!err) | 327 | if (!err) |
309 | *offset_align = ret_va; | 328 | *map_addr = ret_va; |
310 | else | 329 | else |
311 | dma_buf_put(dmabuf); | 330 | dma_buf_put(dmabuf); |
312 | 331 | ||