From ee26a2842ca891d3ae8b1de1b066d29234fc0115 Mon Sep 17 00:00:00 2001
From: Joshua Bakita <jbakita@cs.unc.edu>
Date: Tue, 24 May 2022 21:11:59 -0400
Subject: gpu-paging: Initial working implementation

Supports synchronous page out or in of a specific buffer.

Includes fast reverse struct mapped_buf lookup.

Requires initial set of changes to nvmap as well.
---
 drivers/gpu/nvgpu/os/linux/ioctl_as.c | 197 ++++++++++++++++++++++++++++++++++
 1 file changed, 197 insertions(+)

(limited to 'drivers/gpu/nvgpu/os/linux/ioctl_as.c')

diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index f0cec178..9708ea1a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -32,6 +32,9 @@
 #include "platform_gk20a.h"
 #include "ioctl_as.h"
 #include "os_linux.h"
+#include <linux/nvmap.h> // For nvmap_dmabuf_{d/r}ealloc()
+#include "dmabuf.h" // struct dma_buf things for swapping
+#include "swap.h"
 
 static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
 {
@@ -329,6 +332,192 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp)
 
 	return gk20a_as_release_share(as_share);
 }
+#define OLD_WALK 0
+
+/* Access dmabuf associated with passed file descriptor, copy the associated
+ * pages to an NVME drive, unpin associated pages from DMA'able space, and free
+ * said pages for use by others.
+ * dmabuf is put in a deallocated state, and any GPU mappings will be
+ * invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer().
+ */
+static int nvgpu_as_ioctl_write_swap_buffer(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_swap_buffer_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+	int err = 0;
+#if OLD_WALK
+	struct nvgpu_rbtree_node *node;
+#endif
+	struct nvgpu_mapped_buf *m;
+	struct sg_table *sgt;
+	struct vm_gk20a *vm = as_share->vm;
+	struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
+
+	nvgpu_log_fn(g, " ");
+
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	// Other code walking vm->mapped_buffers grabs this lock
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+#if OLD_WALK
+	// Get mapped buffer corresponding to this dmabuf
+	// TODO: Error on buffer mapped >1
+	for_each_buffer(node, vm->mapped_buffers, m) {
+		if (m->os_priv.dmabuf == dmabuf)
+			break;
+	}
+	// If failed search
+	if (!node || !m) {
+		// No mapped dmabuf associated with FD
+		err = -EBADFD;
+		goto out_put_unlock;
+	}
+#else
+	m = dmabuf_to_mapped_buf(dmabuf);
+	// If failed search
+	if (IS_ERR(m)) {
+		// No mapped dmabuf associated with FD
+		err = -EBADFD;
+		goto out_put_unlock;
+	}
+#endif
+
+	// Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap
+	if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) {
+		err = -ENOTRECOVERABLE;
+		goto out_put_unlock;
+	}
+
+	// Flush dirty GPU L2 cache lines to DRAM
+	// (Assuming that NVMe DRAM acceses are uncached)
+	gk20a_mm_l2_flush(g, false);
+
+	// Copy out (blocking)
+	err = copy_out(m->os_priv.sgt);
+	if (err) {
+		// Inaccessible swap device, etc
+		goto out_put_unlock;
+	}
+
+	// Unpin needs to happen after copy out is done
+	// (No return value check as it's a void function)
+	gk20a_mm_unpin(dev_from_vm(vm), m->os_priv.dmabuf,
+		       m->os_priv.attachment, m->os_priv.sgt);
+
+	// Deallocate dmabuf's backing pages
+	// TODO: Fail early for these cases (where the dmabuf is mmaped, etc),
+	//       before we do all the above (expensive) steps
+	err = nvmap_dealloc_dmabuf(dmabuf);
+	if (err) {
+		// Repin
+		sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
+				   &m->os_priv.attachment);
+		m->os_priv.sgt = sgt;
+		goto out_put_unlock;
+	}
+
+out_put_unlock:
+	// Done with dmabuf, so release our ref to it
+	dma_buf_put(dmabuf);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+	return err;
+}
+
+// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does
+static int nvgpu_as_ioctl_read_swap_buffer(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_swap_buffer_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+	int err = 0;
+#if OLD_WALK
+	struct nvgpu_rbtree_node *node;
+#endif
+	struct nvgpu_mapped_buf *m;
+	struct sg_table *sgt;
+	struct vm_gk20a *vm = as_share->vm;
+	struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
+
+	nvgpu_log_fn(g, " ");
+
+	if (!dmabuf)
+		return -EBADF;
+	// Other code walking vm->mapped_buffers grabs this lock
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+#if OLD_WALK
+	// Get mapped buffer corresponding to this dmabuf
+	// TODO: Error on buffer mapped >1
+	for_each_buffer(node, vm->mapped_buffers, m) {
+		if (m->os_priv.dmabuf == dmabuf)
+			break;
+	}
+	// If failed search
+	if (!node || !m) {
+		// No mapped dmabuf associated with FD
+		err = -EBADFD;
+		goto out_put_unlock;
+	}
+#else
+	m = dmabuf_to_mapped_buf(dmabuf);
+	// If failed search
+	if (IS_ERR(m)) {
+		// No mapped dmabuf associated with FD
+		err = -EBADFD;
+		goto out_put_unlock;
+	}
+#endif
+
+	// Reallocate space for this buffer
+	err = nvmap_realloc_dmabuf(dmabuf);
+	if (err) {
+		// Out of memory (?)
+		goto out_put_unlock;
+	}
+
+	// Repin the buffer to DMA'able memory
+	sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
+			   &m->os_priv.attachment);
+	if (IS_ERR(sgt)) {
+		// Rollback allocation
+		err = nvmap_dealloc_dmabuf(dmabuf);
+		if (err)
+			printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd);
+		err = PTR_ERR(sgt);
+		goto out_put_unlock;
+	}
+	// Do any bookeeping not done by gk20a_mm_pin()
+	m->os_priv.sgt = sgt;
+
+	// Reload page contents from disk (blocking)
+	err = copy_in(sgt);
+	if (err) {
+		int err2;
+		// Rollback pinning and allocation
+		gk20a_mm_unpin(dev_from_vm(vm), m->os_priv.dmabuf,
+			       m->os_priv.attachment, m->os_priv.sgt);
+		err2 = nvmap_dealloc_dmabuf(dmabuf);
+		if (err2)
+			printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd);
+		// Inaccessible swap device, etc
+		goto out_put_unlock;
+	}
+	// Update GPU page tables (PT) to point to new allocation
+	nvgpu_vm_remap(m);
+	// Due to PT update, translation lookaside buffer needs clearing
+	g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+	// Invalidate L2 so that TLB refill does not load stale PT
+	gk20a_mm_l2_flush(g, true);
+
+out_put_unlock:
+	// Done with dmabuf, so release our ref to it
+	dma_buf_put(dmabuf);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+	return err;
+}
 
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
@@ -412,6 +601,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
 			(struct nvgpu_as_get_sync_ro_map_args *)buf);
 		break;
+	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
+		err = nvgpu_as_ioctl_read_swap_buffer(as_share,
+			(struct nvgpu_as_swap_buffer_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
+		err = nvgpu_as_ioctl_write_swap_buffer(as_share,
+			(struct nvgpu_as_swap_buffer_args *)buf);
+		break;
 	default:
 		err = -ENOTTY;
 		break;
-- 
cgit v1.2.2