From 840e039d57d4acfb2be2a82c4b95a6d25c7aacd4 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Tue, 5 Jun 2018 20:53:16 +0100
Subject: gpu: nvgpu: Update Linux side VM code for API solidification

Update the Linux specific code to match the MM API docs in the
previous patch. The user passed page size is plumbed through
the Linux VM mapping calls but is ultimately ignored once the
core VM code is called. This will be handled in the next
patch.

This also adds some code to make the CDE page size picking
happen semi-intelligently. In many cases the CDE buffers can
be mapped with large pages.

Bug 2011640

Change-Id: I20e78e7d5a841e410864b474179e71da1c2482f4
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1740610
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/include/nvgpu/linux/vm.h |  6 +++--
 drivers/gpu/nvgpu/os/linux/cde.c           | 29 +++++++++++++++++++++-
 drivers/gpu/nvgpu/os/linux/ioctl_as.c      |  3 ++-
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c     |  3 ++-
 drivers/gpu/nvgpu/os/linux/vm.c            | 39 ++++++++++++++++++++++--------
 5 files changed, 65 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
index 97b8334b..6f3beaa9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -54,8 +54,9 @@ struct nvgpu_mapped_buf_priv {
 /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
-		       u64 offset_align,
+		       u64 map_addr,
 		       u32 flags,
+		       u32 page_size,
 		       s16 compr_kind,
 		       s16 incompr_kind,
 		       int rw_flag,
@@ -71,8 +72,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
  */
 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
-			u64 *offset_align,
+			u64 *map_addr,
 			u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
+			u32 page_size,
 			s16 compr_kind,
 			s16 incompr_kind,
 			u64 buffer_offset,
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index 66a80403..052a1d21 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -975,6 +975,30 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l
 	return cde_ctx;
 }
 
+static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm,
+				       u32 map_offset, u32 map_size)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	/*
+	 * To be simple we will just make the map size depend on the
+	 * iommu'ability of the driver. If there's an IOMMU we can rely on
+	 * buffers being contiguous. If not, then we'll use 4k pages since we
+	 * know that will work for any buffer.
+	 */
+	if (!nvgpu_iommuable(g))
+		return SZ_4K;
+
+	/*
+	 * If map size or offset is not 64K aligned then use small pages.
+	 */
+	if (map_size & (vm->big_page_size - 1) ||
+	    map_offset & (vm->big_page_size - 1))
+		return SZ_4K;
+
+	return vm->big_page_size;
+}
+
 int gk20a_cde_convert(struct nvgpu_os_linux *l,
 		      struct dma_buf *compbits_scatter_buf,
 		      u64 compbits_byte_offset,
@@ -1071,7 +1095,10 @@ __releases(&l->cde_app->mutex)
 	err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
 				 NVGPU_VM_MAP_CACHEABLE |
 				 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
-				 NVGPU_KIND_INVALID,
+				 gk20a_cde_mapping_page_size(cde_ctx->vm,
+							     map_offset,
+							     map_size),
+				 NV_KIND_INVALID,
 				 compbits_kind, /* incompressible kind */
 				 gk20a_mem_flag_none,
 				 map_offset, map_size,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index 47f612cc..5eb9802f 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -111,6 +111,7 @@ static int gk20a_as_ioctl_map_buffer_ex(
 
 	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
 				   &args->offset, args->flags,
+				   args->page_size,
 				   args->compr_kind,
 				   args->incompr_kind,
 				   args->buffer_offset,
@@ -201,7 +202,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
 
 		err = nvgpu_vm_map_buffer(
 			as_share->vm, map_args.dmabuf_fd,
-			&map_args.offset, map_args.flags,
+			&map_args.offset, map_args.flags, map_args.page_size,
 			compressible_kind, incompressible_kind,
 			map_args.buffer_offset,
 			map_args.mapping_size,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 76ff25c0..938e0abd 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1372,10 +1372,11 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 			args->dmabuf_fd,
 			&args->offset,
 			0,
+			SZ_4K,
+			0,
 			0,
 			0,
 			0,
-			args->mapping_size,
 			NULL);
 	if (err)
 		goto err_remove_vm;
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
index baa77515..eb9ca8fd 100644
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -175,8 +175,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 
 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
-		       u64 offset_align,
+		       u64 map_addr,
 		       u32 flags,
+		       u32 page_size,
 		       s16 compr_kind,
 		       s16 incompr_kind,
 		       int rw_flag,
@@ -192,12 +193,8 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 	struct nvgpu_sgt *nvgpu_sgt = NULL;
 	struct nvgpu_mapped_buf *mapped_buffer = NULL;
 	struct dma_buf_attachment *attachment;
-	u64 map_addr = 0ULL;
 	int err = 0;
 
-	if (flags & NVGPU_VM_MAP_FIXED_OFFSET)
-		map_addr = offset_align;
-
 	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
 	if (IS_ERR(sgt)) {
 		nvgpu_warn(g, "Failed to pin dma_buf!");
@@ -253,8 +250,9 @@ clean_up:
 
 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
-			u64 *offset_align,
+			u64 *map_addr,
 			u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
+			u32 page_size,
 			s16 compr_kind,
 			s16 incompr_kind,
 			u64 buffer_offset,
@@ -274,8 +272,28 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 		return PTR_ERR(dmabuf);
 	}
 
+	/*
+	 * For regular maps we do not accept either an input address or a
+	 * buffer_offset.
+	 */
+	if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) &&
+	    (buffer_offset || *map_addr)) {
+		nvgpu_err(g,
+			  "Regular map with addr/buf offset is not supported!");
+		return -EINVAL;
+	}
+
+	/*
+	 * Map size is always buffer size for non fixed mappings. As such map
+	 * size should be left as zero by userspace for non-fixed maps.
+	 */
+	if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		nvgpu_err(g, "map_size && non-fixed-mapping!");
+		return -EINVAL;
+	}
+
 	/* verify that we're not overflowing the buffer, i.e.
-	 * (buffer_offset + mapping_size)> dmabuf->size.
+	 * (buffer_offset + mapping_size) > dmabuf->size.
 	 *
 	 * Since buffer_offset + mapping_size could overflow, first check
 	 * that mapping size < dmabuf_size, at which point we can subtract
@@ -284,7 +302,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 	if ((mapping_size > dmabuf->size) ||
 			(buffer_offset > (dmabuf->size - mapping_size))) {
 		nvgpu_err(g,
-			  "buf size %llx < (offset(%llx) + map_size(%llx))\n",
+			  "buf size %llx < (offset(%llx) + map_size(%llx))",
 			  (u64)dmabuf->size, buffer_offset, mapping_size);
 		dma_buf_put(dmabuf);
 		return -EINVAL;
@@ -296,8 +314,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 		return err;
 	}
 
-	err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
+	err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr,
 				 nvgpu_vm_translate_linux_flags(g, flags),
+				 page_size,
 				 compr_kind, incompr_kind,
 				 gk20a_mem_flag_none,
 				 buffer_offset,
@@ -306,7 +325,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 				 &ret_va);
 
 	if (!err)
-		*offset_align = ret_va;
+		*map_addr = ret_va;
 	else
 		dma_buf_put(dmabuf);
 
-- 
cgit v1.2.2