From 9d2c9072c8b9a7742db3974d6027df9d44e0953f Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Mon, 4 May 2015 18:41:23 +0300
Subject: gpu: nvgpu: User-space managed address space support

Implement NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED, which
enables creating userspace-managed GPU address spaces.

When an address space is marked as userspace-managed, the following
changes are in effect:

- Only fixed-address mappings are allowed.
- VA space allocation for fixed-address mappings is not required,
  except to mark space as sparse.
- Maps and unmaps are always immediate. In particular, the mapping
  ref increments at kickoffs and decrements at job completion are
  skipped.

Bug 1614735
Bug 1623949
Bug 1660392

Change-Id: I834fe19b3f65e9b02c268952383eddee0e465759
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/738558
Reviewed-on: http://git-master/r/833253
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/as_gk20a.c   |   7 +-
 drivers/gpu/nvgpu/gk20a/as_gk20a.h   |   6 +-
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c |   3 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c      |   2 +
 drivers/gpu/nvgpu/gk20a/gk20a.h      |   2 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c   | 131 +++++++++++++++++++++++++++--------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h   |   6 +-
 7 files changed, 121 insertions(+), 36 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 87b32add..b6b38541 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -38,7 +38,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
 }
 
 int gk20a_as_alloc_share(struct gk20a_as *as,
-			 u32 flags, struct gk20a_as_share **out)
+			 u32 big_page_size, u32 flags,
+			 struct gk20a_as_share **out)
 {
 	struct gk20a *g = gk20a_from_as(as);
 	struct gk20a_as_share *as_share;
@@ -59,7 +60,7 @@ int gk20a_as_alloc_share(struct gk20a_as *as,
 	err = gk20a_busy(g->dev);
 	if (err)
 		goto failed;
-	err = g->ops.mm.vm_alloc_share(as_share, flags);
+	err = g->ops.mm.vm_alloc_share(as_share, big_page_size, flags);
 	gk20a_idle(g->dev);
 
 	if (err)
@@ -332,7 +333,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp)
 
 	g = container_of(inode->i_cdev, struct gk20a, as.cdev);
 
-	err = gk20a_as_alloc_share(&g->as, 0, &as_share);
+	err = gk20a_as_alloc_share(&g->as, 0, 0, &as_share);
 	if (err) {
 		gk20a_dbg_fn("failed to alloc share");
 		return err;
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
index 166000a8..d347479e 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -1,7 +1,7 @@
 /*
  * GK20A Address Spaces
  *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -42,7 +42,9 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
 int gk20a_as_dev_open(struct inode *inode, struct file *filp);
 int gk20a_as_dev_release(struct inode *inode, struct file *filp);
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int gk20a_as_alloc_share(struct gk20a_as *as,
+
+/* if big_page_size == 0, the default big page size is used */
+int gk20a_as_alloc_share(struct gk20a_as *as, u32 big_page_size,
 			 u32 flags, struct gk20a_as_share **out);
 
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 0b6b5913..6dc92713 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -166,7 +166,8 @@ static int gk20a_ctrl_alloc_as(
 		goto clean_up;
 	}
 
-	err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);
+	err = gk20a_as_alloc_share(&g->as, args->big_page_size, args->flags,
+				   &as_share);
 	if (err)
 		goto clean_up_file;
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0d74099e..a97ec735 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1993,6 +1993,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	    gk20a_platform_has_syncpoints(g->dev))
 		gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
 
+	gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
+
 	gpu->gpc_mask = 1;
 
 	g->ops.gr.detect_sm_arch(g);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 3542a597..ff37039f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -370,7 +370,7 @@ struct gpu_ops {
 				struct vm_gk20a_mapping_batch *batch);
 		void (*vm_remove)(struct vm_gk20a *vm);
 		int (*vm_alloc_share)(struct gk20a_as_share *as_share,
-				      u32 flags);
+				      u32 big_page_size, u32 flags);
 		int (*vm_bind_channel)(struct gk20a_as_share *as_share,
 				struct channel_gk20a *ch);
 		int (*fb_flush)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 141a37af..a9bca317 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -767,6 +767,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
 	struct rb_node *node;
 	int i = 0;
 
+	if (vm->userspace_managed) {
+		*mapped_buffers = NULL;
+		*num_buffers = 0;
+		return 0;
+	}
+
 	mutex_lock(&vm->update_gmmu_lock);
 
 	buffer_list = nvgpu_alloc(sizeof(*buffer_list) *
@@ -1135,7 +1141,8 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
 
 static int validate_fixed_buffer(struct vm_gk20a *vm,
 				 struct buffer_attrs *bfr,
-				 u64 map_offset, u64 map_size)
+				 u64 map_offset, u64 map_size,
+				 struct vm_reserved_va_node **pva_node)
 {
 	struct device *dev = dev_from_vm(vm);
 	struct vm_reserved_va_node *va_node;
@@ -1154,15 +1161,16 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
 		return -EINVAL;
 	}
 
-	/* find the space reservation */
+	/* Find the space reservation, but it's ok to have none for
+	 * userspace-managed address spaces */
 	va_node = addr_to_reservation(vm, map_offset);
-	if (!va_node) {
+	if (!va_node && !vm->userspace_managed) {
 		gk20a_warn(dev, "fixed offset mapping without space allocation");
 		return -EINVAL;
 	}
 
-	/* mapped area should fit inside va */
-	if (map_end > va_node->vaddr_start + va_node->size) {
+	/* Mapped area should fit inside va, if there's one */
+	if (va_node && map_end > va_node->vaddr_start + va_node->size) {
 		gk20a_warn(dev, "fixed offset mapping size overflows va node");
 		return -EINVAL;
 	}
@@ -1177,6 +1185,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
 		return -EINVAL;
 	}
 
+	*pva_node = va_node;
+
 	return 0;
 }
 
@@ -1411,16 +1421,28 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 	u64 buf_addr;
 	u64 ctag_map_win_size = 0;
 	u32 ctag_map_win_ctagline = 0;
+	struct vm_reserved_va_node *va_node = NULL;
+
+	if (user_mapped && vm->userspace_managed &&
+	    !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		gk20a_err(d,
+			  "%s: non-fixed-offset mapping not available on userspace managed address spaces",
+			  __func__);
+		return -EFAULT;
+	}
 
 	mutex_lock(&vm->update_gmmu_lock);
 
 	/* check if this buffer is already mapped */
-	map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
-						   flags, kind, sgt,
-						   user_mapped, rw_flag);
-	if (map_offset) {
-		mutex_unlock(&vm->update_gmmu_lock);
-		return map_offset;
+	if (!vm->userspace_managed) {
+		map_offset = gk20a_vm_map_duplicate_locked(
+			vm, dmabuf, offset_align,
+			flags, kind, sgt,
+			user_mapped, rw_flag);
+		if (map_offset) {
+			mutex_unlock(&vm->update_gmmu_lock);
+			return map_offset;
+		}
 	}
 
 	/* pin buffer to get phys/iovmm addr */
@@ -1504,7 +1526,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 
 	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
 		err = validate_fixed_buffer(vm, &bfr,
-			offset_align, mapping_size);
+					    offset_align, mapping_size,
+					    &va_node);
 		if (err)
 			goto clean_up;
 
@@ -1671,11 +1694,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 
 	gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
 
-	if (!va_allocated) {
-		struct vm_reserved_va_node *va_node;
-
-		/* find the space reservation */
-		va_node = addr_to_reservation(vm, map_offset);
+	if (va_node) {
 		list_add_tail(&mapped_buffer->va_buffers_list,
 			      &va_node->va_buffers_list);
 		mapped_buffer->va_node = va_node;
@@ -1753,18 +1772,27 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 	struct mapped_buffer_node *mapped_buffer;
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct device *d = dev_from_vm(vm);
+	const bool fixed_mapping =
+		(flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;
+
+	if (vm->userspace_managed && !fixed_mapping) {
+		gk20a_err(d,
+			  "%s: non-fixed-offset mapping is not available on userspace managed address spaces",
+			  __func__);
+		return -EFAULT;
+	}
 
-	if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
-		/* This will be implemented later */
+	if (fixed_mapping && !vm->userspace_managed) {
 		gk20a_err(d,
-			  "%s: fixed-offset compbits mapping not yet supported",
+			  "%s: fixed-offset mapping is available only on userspace managed address spaces",
 			  __func__);
 		return -EFAULT;
 	}
 
 	mutex_lock(&vm->update_gmmu_lock);
 
-	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
+	mapped_buffer =
+		find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
 
 	if (!mapped_buffer || !mapped_buffer->user_mapped) {
 		mutex_unlock(&vm->update_gmmu_lock);
@@ -1774,7 +1802,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 
 	if (!mapped_buffer->ctags_mappable) {
 		mutex_unlock(&vm->update_gmmu_lock);
-		gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
+		gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
+			  __func__, mapping_gva);
 		return -EFAULT;
 	}
 
@@ -1804,10 +1833,41 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 		cacheline_offset_start =
 			cacheline_start * aggregate_cacheline_sz;
 
+		if (fixed_mapping) {
+			struct buffer_attrs bfr;
+			int err;
+			struct vm_reserved_va_node *va_node = NULL;
+
+			memset(&bfr, 0, sizeof(bfr));
+
+			bfr.pgsz_idx = small_pgsz_index;
+
+			err = validate_fixed_buffer(
+				vm, &bfr, *compbits_win_gva,
+				mapped_buffer->ctag_map_win_size, &va_node);
+
+			if (err) {
+				mutex_unlock(&vm->update_gmmu_lock);
+				return err;
+			}
+
+			if (va_node) {
+				/* this would create a dangling GPU VA
+				 * pointer if the space is freed
+				 * before before the buffer is
+				 * unmapped */
+				mutex_unlock(&vm->update_gmmu_lock);
+				gk20a_err(d,
+					  "%s: comptags cannot be mapped into allocated space",
+					  __func__);
+				return -EINVAL;
+			}
+		}
+
 		mapped_buffer->ctag_map_win_addr =
 			g->ops.mm.gmmu_map(
 				vm,
-				0,
+				!fixed_mapping ? 0 : *compbits_win_gva, /* va */
 				g->gr.compbit_store.mem.sgt,
 				cacheline_offset_start, /* sg offset */
 				mapped_buffer->ctag_map_win_size, /* size */
@@ -1828,6 +1888,15 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 				  __func__, mapping_gva);
 			return -ENOMEM;
 		}
+	} else if (fixed_mapping && *compbits_win_gva &&
+		   mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
+		mutex_unlock(&vm->update_gmmu_lock);
+		gk20a_err(d,
+			  "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
+			  "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
+			  __func__, mapping_gva,
+			  mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
+		return -EINVAL;
 	}
 
 	*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
@@ -2662,6 +2731,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 		u64 kernel_reserved,
 		u64 aperture_size,
 		bool big_pages,
+		bool userspace_managed,
 		char *name)
 {
 	int err, i;
@@ -2685,6 +2755,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 
 	vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
 
+	vm->userspace_managed = userspace_managed;
+
 	vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,
 			vm->big_page_size);
 
@@ -2821,7 +2893,8 @@ clean_up_pdes:
 }
 
 /* address space interfaces for the gk20a module */
-int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
+			 u32 flags)
 {
 	struct gk20a_as *as = as_share->as;
 	struct gk20a *g = gk20a_from_as(as);
@@ -2829,6 +2902,8 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
 	struct vm_gk20a *vm;
 	char name[32];
 	int err;
+	const bool userspace_managed =
+		(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
 
 	gk20a_dbg_fn("");
 
@@ -2856,7 +2931,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
 	err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
 			    mm->channel.kernel_size,
 			    mm->channel.user_size + mm->channel.kernel_size,
-			    !mm->disable_bigpage, name);
+			    !mm->disable_bigpage, userspace_managed, name);
 
 	return err;
 }
@@ -3235,7 +3310,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
 	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
 		      mm->bar1.aperture_size - SZ_4K,
-		      mm->bar1.aperture_size, false, "bar1");
+		      mm->bar1.aperture_size, false, false, "bar1");
 
 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
@@ -3263,7 +3338,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
 
 	gk20a_init_vm(mm, vm, big_page_size,
 		      SZ_4K * 16, GK20A_PMU_VA_SIZE,
-		      GK20A_PMU_VA_SIZE * 2, false,
+		      GK20A_PMU_VA_SIZE * 2, false, false,
 		      "system");
 
 	err = gk20a_alloc_inst_block(g, inst_block);
@@ -3303,7 +3378,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
 			SZ_4K * 16,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, "cde");
+			false, false, "cde");
 }
 
 void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7be4383b..2dd4ccf5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -251,6 +251,8 @@ struct vm_gk20a {
 
 	u32 big_page_size;
 
+	bool userspace_managed;
+
 	const struct gk20a_mmu_level *mmu_levels;
 
 	struct kref ref;
@@ -586,7 +588,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
 /* vm-as interface */
 struct nvgpu_as_alloc_space_args;
 struct nvgpu_as_free_space_args;
-int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
+			 u32 flags);
 int gk20a_vm_release_share(struct gk20a_as_share *as_share);
 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 			 struct nvgpu_as_alloc_space_args *args);
@@ -621,6 +624,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 		u64 kernel_reserved,
 		u64 aperture_size,
 		bool big_pages,
+		bool userspace_managed,
 		char *name);
 void gk20a_deinit_vm(struct vm_gk20a *vm);
 
-- 
cgit v1.2.2