From ee26a2842ca891d3ae8b1de1b066d29234fc0115 Mon Sep 17 00:00:00 2001
From: Joshua Bakita <jbakita@cs.unc.edu>
Date: Tue, 24 May 2022 21:11:59 -0400
Subject: gpu-paging: Initial working implementation

Supports synchronous page out or in of a specific buffer.

Includes fast reverse struct mapped_buf lookup.

Requires initial set of changes to nvmap as well.
---
 drivers/gpu/nvgpu/os/linux/vm.c | 70 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/nvgpu/os/linux/vm.c')

diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
index 8956cce5..fcb58ac4 100644
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/dma-buf.h>
+#include <linux/nvmap.h>
 #include <linux/scatterlist.h>
 #include <uapi/linux/nvgpu.h>
 
@@ -71,7 +72,23 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
 {
 	struct nvgpu_rbtree_node *node = NULL;
 	struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+	struct list_head* nvmap_priv;
+
+	// Try fast lookup first
+	if (!IS_ERR(nvmap_priv = nvmap_get_priv_list(dmabuf))) {
+		struct nvgpu_mapped_buf *mapped_buffer;
+		struct nvgpu_mapped_buf_priv *priv;
+
+		list_for_each_entry(priv, nvmap_priv, nvmap_priv_entry) {
+			mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv);
+			if (mapped_buffer->os_priv.dmabuf == dmabuf &&
+			    mapped_buffer->kind == kind)
+				return mapped_buffer;
+		}
+	}
 
+	// Full traversal (not an nvmap buffer?)
+	printk(KERN_INFO "nvmap: Fast reverse lookup failed!");
 	nvgpu_rbtree_enum_start(0, &node, root);
 
 	while (node) {
@@ -158,6 +175,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 	 */
 	gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment,
 		       mapped_buffer->os_priv.sgt);
+	list_del(&mapped_buffer->os_priv.nvmap_priv_entry);
 	dma_buf_put(os_buf->dmabuf);
 
 	nvgpu_log(g, gpu_dbg_map,
@@ -198,6 +216,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 	struct nvgpu_sgt *nvgpu_sgt = NULL;
 	struct nvgpu_mapped_buf *mapped_buffer = NULL;
 	struct dma_buf_attachment *attachment;
+	struct list_head *nvmap_priv;
 	int err = 0;
 
 	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
@@ -243,6 +262,12 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 	mapped_buffer->os_priv.dmabuf = dmabuf;
 	mapped_buffer->os_priv.attachment = attachment;
 	mapped_buffer->os_priv.sgt    = sgt;
+	nvmap_priv = nvmap_get_priv_list(dmabuf);
+	if (!IS_ERR(nvmap_priv))
+		list_add(&mapped_buffer->os_priv.nvmap_priv_entry, nvmap_priv);
+	else
+		// So we can always safely call list_del()
+		INIT_LIST_HEAD(&mapped_buffer->os_priv.nvmap_priv_entry);
 
 	*gpu_va = mapped_buffer->addr;
 	return 0;
@@ -353,6 +378,49 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
 	gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
 		       mapped_buffer->os_priv.attachment,
 		       mapped_buffer->os_priv.sgt);
-
+	list_del(&mapped_buffer->os_priv.nvmap_priv_entry);
 	dma_buf_put(mapped_buffer->os_priv.dmabuf);
 }
+
+/**
+ * Given an nvgpu_mapped_buf m, map m->os_priv.sgt into m->addr
+ * Very similar to nvgpu_vm_map_buffer, except that this assumes all necessary
+ * PTEs and PDEs have been created. This merely updates the physical address(es)
+ * in the associated PTEs, leaving all other attributes unchanged.
+ *
+ * NOP if sgt is already mapped for addr.
+ *
+ * vm->gmmu_update_lock must be held.
+ *
+ * Caller is responsible for flushing the TLB and L2 caches.
+ */
+void nvgpu_vm_remap(struct nvgpu_mapped_buf *m)
+{
+	// TODO: Input validation
+	struct scatterlist *sg;
+	unsigned int i = 0;
+	u64 curr_vaddr = m->addr;
+
+	// For each element of the scatterlist
+	// (based off for_each_sgtable_dma_sg() macro in newer kernels)
+	for_each_sg(m->os_priv.sgt->sgl, sg, m->os_priv.sgt->nents, i) {
+		unsigned int sg_off = 0;
+		// Keep mapping data at the next unmapped virtual address
+		// until each scatterlist element is entirely mapped
+		while (sg_off < sg_dma_len(sg)) {
+			int amt_mapped = __nvgpu_update_paddr(gk20a_from_vm(m->vm),
+							      m->vm,
+							      curr_vaddr,
+							      sg_dma_address(sg) + sg_off);
+			if (amt_mapped < 0) {
+				printk(KERN_ERR "nvgpu: Error %d from __nvgpu_update_paddr() in nvgpu_vm_remap()! Had mapped %llu of %llu bytes.\n", amt_mapped, curr_vaddr - m->addr, m->size);
+				return;
+			}
+			curr_vaddr += amt_mapped;
+			sg_off += amt_mapped;
+		}
+	}
+	if (curr_vaddr != m->addr + m->size) {
+		printk(KERN_ERR "nvgpu: Mapped %llu bytes when %llu bytes expected! Expect page table corruption!\n", curr_vaddr - m->addr, m->size);
+	}
+}
-- 
cgit v1.2.2