From ff66847a00ac27d8d94b3664ec156a195dbf3676 Mon Sep 17 00:00:00 2001
From: Joshua Bakita <jbakita@cs.unc.edu>
Date: Wed, 25 May 2022 22:01:24 -0400
Subject: gpu-paging: Split swap in/out to prepare for async support.

---
 drivers/gpu/nvgpu/os/linux/ioctl_as.c | 268 +++++++++++++++++++++-------------
 include/uapi/linux/nvgpu.h            |  10 +-
 2 files changed, 175 insertions(+), 103 deletions(-)

diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index 9708ea1a..af6cdb5b 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -332,75 +332,68 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp)
 
 	return gk20a_as_release_share(as_share);
 }
-#define OLD_WALK 0
 
 /* Access dmabuf associated with passed file descriptor, copy the associated
  * pages to an NVME drive, unpin associated pages from DMA'able space, and free
  * said pages for use by others.
  * dmabuf is put in a deallocated state, and any GPU mappings will be
  * invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer().
+ * ...
+ * Starts a swap-out operation by flushing the GPU L2 and starting I/O.
+ * vm->update_gmmu_lock /must/ already be held.
  */
 static int nvgpu_as_ioctl_write_swap_buffer(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_swap_buffer_args *args)
+		struct dma_buf *dmabuf,
+		struct nvgpu_mapped_buf *m,
+		struct vm_gk20a *vm,
+		struct gk20a *g)
 {
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
 	int err = 0;
-#if OLD_WALK
-	struct nvgpu_rbtree_node *node;
-#endif
-	struct nvgpu_mapped_buf *m;
-	struct sg_table *sgt;
-	struct vm_gk20a *vm = as_share->vm;
-	struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
 
 	nvgpu_log_fn(g, " ");
 
-	if (IS_ERR(dmabuf))
-		return PTR_ERR(dmabuf);
-
-	// Other code walking vm->mapped_buffers grabs this lock
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-
-#if OLD_WALK
-	// Get mapped buffer corresponding to this dmabuf
-	// TODO: Error on buffer mapped >1
-	for_each_buffer(node, vm->mapped_buffers, m) {
-		if (m->os_priv.dmabuf == dmabuf)
-			break;
-	}
-	// If failed search
-	if (!node || !m) {
-		// No mapped dmabuf associated with FD
-		err = -EBADFD;
-		goto out_put_unlock;
-	}
-#else
-	m = dmabuf_to_mapped_buf(dmabuf);
-	// If failed search
-	if (IS_ERR(m)) {
-		// No mapped dmabuf associated with FD
-		err = -EBADFD;
-		goto out_put_unlock;
-	}
-#endif
-
 	// Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap
 	if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) {
 		err = -ENOTRECOVERABLE;
-		goto out_put_unlock;
+		goto out;
 	}
 
+	// TODO: Verify that we'll likely be able to free the pages later
+	//       before we start the copy.
+
 	// Flush dirty GPU L2 cache lines to DRAM
 	// (Assuming that NVMe DRAM acceses are uncached)
 	gk20a_mm_l2_flush(g, false);
 
-	// Copy out (blocking)
+	// Copy out (blocking) TODO: non-blocking
+	// Could fail on inaccessible swap device, etc
 	err = copy_out(m->os_priv.sgt);
-	if (err) {
-		// Inaccessible swap device, etc
-		goto out_put_unlock;
-	}
+
+out:
+	return err;
+}
+
+/* Finish a swap-out operation by waiting on I/O to complete, then unpinning
+ * and freeing the pages.
+ *
+ * Note that this may fail even if nvgpu_as_ioctl_write_swap_buffer()
+ * succeeded if the user mmaps the buffer before finishing the swap-out.
+ *
+ * vm->update_gmmu_lock /must/ already be held.
+ */
+static int nvgpu_as_ioctl_write_swap_buffer_finish(
+		struct dma_buf *dmabuf,
+		struct nvgpu_mapped_buf *m,
+		struct vm_gk20a *vm,
+		struct gk20a *g)
+{
+	struct sg_table *sgt;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	// Wait for the pages to get written out
+	//wait_for_completion_io(m->os_priv.swap_completion);
 
 	// Unpin needs to happen after copy out is done
 	// (No return value check as it's a void function)
@@ -416,67 +409,30 @@ static int nvgpu_as_ioctl_write_swap_buffer(
 		sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
 				   &m->os_priv.attachment);
 		m->os_priv.sgt = sgt;
-		goto out_put_unlock;
 	}
 
-out_put_unlock:
-	// Done with dmabuf, so release our ref to it
-	dma_buf_put(dmabuf);
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
 	return err;
 }
 
-// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does
+/* Starts a swap-in operation by allocating and pinning backing pages, and
+ * starting I/O.
+ * vm->update_gmmu_lock /must/ already be held.
+ */
 static int nvgpu_as_ioctl_read_swap_buffer(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_swap_buffer_args *args)
+		struct dma_buf *dmabuf,
+		struct nvgpu_mapped_buf *m,
+		struct vm_gk20a *vm,
+		struct gk20a *g)
 {
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-	int err = 0;
-#if OLD_WALK
-	struct nvgpu_rbtree_node *node;
-#endif
-	struct nvgpu_mapped_buf *m;
 	struct sg_table *sgt;
-	struct vm_gk20a *vm = as_share->vm;
-	struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
+	int err = 0;
 
 	nvgpu_log_fn(g, " ");
 
-	if (!dmabuf)
-		return -EBADF;
-	// Other code walking vm->mapped_buffers grabs this lock
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-
-#if OLD_WALK
-	// Get mapped buffer corresponding to this dmabuf
-	// TODO: Error on buffer mapped >1
-	for_each_buffer(node, vm->mapped_buffers, m) {
-		if (m->os_priv.dmabuf == dmabuf)
-			break;
-	}
-	// If failed search
-	if (!node || !m) {
-		// No mapped dmabuf associated with FD
-		err = -EBADFD;
-		goto out_put_unlock;
-	}
-#else
-	m = dmabuf_to_mapped_buf(dmabuf);
-	// If failed search
-	if (IS_ERR(m)) {
-		// No mapped dmabuf associated with FD
-		err = -EBADFD;
-		goto out_put_unlock;
-	}
-#endif
-
 	// Reallocate space for this buffer
 	err = nvmap_realloc_dmabuf(dmabuf);
-	if (err) {
-		// Out of memory (?)
-		goto out_put_unlock;
-	}
+	if (err)
+		goto out; // Out of memory (?)
 
 	// Repin the buffer to DMA'able memory
 	sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
@@ -485,14 +441,15 @@ static int nvgpu_as_ioctl_read_swap_buffer(
 		// Rollback allocation
 		err = nvmap_dealloc_dmabuf(dmabuf);
 		if (err)
-			printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd);
+			printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf '%s' to be in an inconsistent state!\n", err, dmabuf->exp_name);
 		err = PTR_ERR(sgt);
-		goto out_put_unlock;
+		goto out;
 	}
 	// Do any bookeeping not done by gk20a_mm_pin()
 	m->os_priv.sgt = sgt;
 
 	// Reload page contents from disk (blocking)
+	// TODO: non-blocking
 	err = copy_in(sgt);
 	if (err) {
 		int err2;
@@ -501,20 +458,126 @@ static int nvgpu_as_ioctl_read_swap_buffer(
 			       m->os_priv.attachment, m->os_priv.sgt);
 		err2 = nvmap_dealloc_dmabuf(dmabuf);
 		if (err2)
-			printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd);
+			printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf '%s' to be in an inconsistent state!\n", err2, dmabuf->exp_name);
 		// Inaccessible swap device, etc
-		goto out_put_unlock;
+		goto out;
 	}
+
+out:
+	return err;
+}
+
+/* Finish a swap-in operation by mapping the pages and waiting on I/O to
+ * complete.
+ */
+static int nvgpu_as_ioctl_read_swap_buffer_finish(
+		struct dma_buf *dmabuf,
+		struct nvgpu_mapped_buf *m,
+		struct vm_gk20a *vm,
+		struct gk20a *g)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
 	// Update GPU page tables (PT) to point to new allocation
 	nvgpu_vm_remap(m);
 	// Due to PT update, translation lookaside buffer needs clearing
 	g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
 	// Invalidate L2 so that TLB refill does not load stale PT
 	gk20a_mm_l2_flush(g, true);
+	// Wait for read to complete if it hasn't yet
+	//wait_for_completion_io(m->os_priv.swap_completion);
+
+	return err;
+}
+
+#define NVGPU_SWAP_ALL -1
+
+/* All swap functions require some common boilerplate. This function serves as
+ * a common entrypoint for all swap functions my handling that boilerplate,
+ * with includes input validation and locking for all functions.
+ * @param cmd IOCTL command code
+ */
+static int nvgpu_as_dev_ioctl_swap(
+		unsigned int cmd,
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_swap_buffer_args *args) {
+	struct vm_gk20a *vm = as_share->vm;
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct nvgpu_mapped_buf *m;
+	struct dma_buf *dmabuf;
+	int err;
+	// Other code walking vm->mapped_buffers grabs this lock
+	// Note that we don't really need to do this before getting the dmabuf,
+	// but we do for now to limit code complexity.
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	// Grab dmabuf and mapped_buf (if necessary) depending on op type
+	if (args->dmabuf_fd != NVGPU_SWAP_ALL) {
+		// If not swapping out everything, get dmabuf, then mapped_buf
+		dmabuf = dma_buf_get(args->dmabuf_fd);
+		if (IS_ERR(dmabuf)) {
+			err = PTR_ERR(dmabuf);
+			goto out_unlock;
+		}
+		// Get mapped buffer corresponding to this dmabuf
+		m = dmabuf_to_mapped_buf(dmabuf);
+		// If failed search
+		if (IS_ERR(m)) {
+			// No mapped dmabuf associated with FD
+			err = -EBADFD;
+			goto out_put_unlock;
+		}
+	} else {
+		// When swapping everything, we get buffers by walking the
+		// mapped_buf rbtree and then use those to get the dmabuf.
+		// TODO
+		//dmabuf = mapped_buf_to_dmabuf(m);
+		err = -EBADFD; // Not yet supported
+		goto out_unlock;
+	}
+
+	// Run appropriate command
+	// XXX: Validate that buffer state is valid for the requested command.
+	switch (cmd) {
+	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
+		// Just a combo of the _ASYNC versions. Saves caller a lock,
+		// some lookups, and an extra syscall. Partially kept for
+		// legacy reasons.
+		err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g);
+		if (err)
+			goto out_put_unlock;
+		err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g);
+		break;
+	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC:
+		err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g);
+		break;
+	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH:
+		err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g);
+		break;
+	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
+		// See comment on NVGPU_AS_IOCTL_READ_SWAP_BUFFER above
+		err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g);
+		if (err)
+			goto out_put_unlock;
+		err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g);
+		break;
+	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC:
+		err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g);
+		break;
+	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH:
+		err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g);
+		break;
+	default:
+		err = -ENOTTY;
+		break;
+	}
 
 out_put_unlock:
 	// Done with dmabuf, so release our ref to it
 	dma_buf_put(dmabuf);
+out_unlock:
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
 	return err;
 }
@@ -602,11 +665,12 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			(struct nvgpu_as_get_sync_ro_map_args *)buf);
 		break;
 	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
-		err = nvgpu_as_ioctl_read_swap_buffer(as_share,
-			(struct nvgpu_as_swap_buffer_args *)buf);
-		break;
+	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC:
+	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH:
 	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
-		err = nvgpu_as_ioctl_write_swap_buffer(as_share,
+	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC:
+	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH:
+		err = nvgpu_as_dev_ioctl_swap(cmd, as_share,
 			(struct nvgpu_as_swap_buffer_args *)buf);
 		break;
 	default:
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 0138b720..b8ea59a1 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -2206,9 +2206,17 @@ struct nvgpu_as_swap_buffer_args {
 	_IOW(NVGPU_AS_IOCTL_MAGIC,  13, struct nvgpu_as_swap_buffer_args)
 #define NVGPU_AS_IOCTL_READ_SWAP_BUFFER	\
 	_IOW(NVGPU_AS_IOCTL_MAGIC,  14, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC	\
+	_IOW(NVGPU_AS_IOCTL_MAGIC,  15, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC	\
+	_IOW(NVGPU_AS_IOCTL_MAGIC,  16, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH	\
+	_IOW(NVGPU_AS_IOCTL_MAGIC,  17, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH	\
+	_IOW(NVGPU_AS_IOCTL_MAGIC,  18, struct nvgpu_as_swap_buffer_args)
 
 #define NVGPU_AS_IOCTL_LAST            \
-	_IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER)
+	_IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH)
 #define NVGPU_AS_IOCTL_MAX_ARG_SIZE	\
 	sizeof(struct nvgpu_as_map_buffer_ex_args)
 
-- 
cgit v1.2.2