gpu-paging: Initial working implementation

Supports synchronous page out or in of a specific buffer. Includes fast reverse struct mapped_buf lookup. Requires initial set of changes to nvmap as well.
author: Joshua Bakita <jbakita@cs.unc.edu> 2022-05-24 21:11:59 -0400
committer: Joshua Bakita <jbakita@cs.unc.edu> 2022-05-24 21:11:59 -0400
commit: ee26a2842ca891d3ae8b1de1b066d29234fc0115 (patch)
tree: a0bf21050569e8d369fc1410860e57158fac761b
parent: 46b43d2b2485233397f4f62b9bac6d35434b7aea (diff)
10 files changed, 471 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 8c5b92e1..c23c858a 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -9,6 +9,7 @@ ccflags-y += -I$(srctree.nvgpu-next)/drivers/gpu/nvgpu
 ccflags-y += -I$(srctree)/drivers/devfreq
 ccflags-y += -Wno-multichar
+ccflags-y += -Wno-sign-compare
 ccflags-y += -Werror
 ccflags-y += -Wno-error=cpp
 ifeq ($(VERSION),4)
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 748e9f45..a04e501f 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -36,6 +36,9 @@
 #include "gk20a/mm_gk20a.h"
+// XXX: Shouldn't really be here! Needed for __nvgpu_update_paddr()
+#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
 #define __gmmu_dbg(g, attrs, fmt, args...)                              \
        do {                                                            \
                if (attrs->debug) {                                     \
@@ -938,3 +941,54 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
        return 0;
 }
+u64 pgsz_enum_to_bytes(int sz) {
+        if (sz == GMMU_PAGE_SIZE_SMALL)
+                return SZ_4K;
+        else
+                return SZ_64K; // Dangerous! Big pages may also be 128k. Should check ram_in_big_page_size... registers.
+}
+// Caller is responsible for TLB/L2 flushing so that this can be called
+// repeatedly with low overhead.
+int __nvgpu_update_paddr(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u64 paddr)
+{
+        struct nvgpu_gmmu_pd *pd;
+        u32 pd_idx, pd_offs;
+        int err;
+        u32 pte[2]; // Safe for at least gv11b
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz = 0,
+        };
+//      u32 pte_orig[2];
+        // Get existing pte entry and location
+        err = __nvgpu_locate_pte(g, vm, &vm->pdb,
+                                 vaddr, 0, &attrs,
+                                 pte, &pd, &pd_idx, &pd_offs);
+        if (unlikely(err)) {
+                printk(KERN_ERR "nvgpu: Unable to find PTE for vaddr %llx in __nvgpu_update_paddr()\n", vaddr);
+                return err;
+        }
+        // TODO: Verify that the PTE is actually in SYSMEM
+//      pte_orig[0] = pte[0];
+//      pte_orig[1] = pte[1];
+        // Following logic is borrowed from __update_pte() for gp10b+
+        // TODO: Make this work for gk20a-gp10b!
+        // Zero-out the address field
+        pte[0] &= ~gmmu_new_pte_address_sys_f(~0 >> gmmu_new_pte_address_shift_v());
+        pte[1] &= ~(~0U >> (24 + gmmu_new_pte_address_shift_v()));
+        // Write new address (upper and lower bits)
+        pte[0] |= gmmu_new_pte_address_sys_f(paddr >> gmmu_new_pte_address_shift_v());
+        pte[1] |= paddr >> (24 + gmmu_new_pte_address_shift_v());
+        // Commit to the page tables
+        pd_write(g, pd, pd_offs, pte[0]);
+        pd_write(g, pd, pd_offs + 1, pte[1]);
+        nvgpu_wmb(); // XXX: Is this needed?
+//      printk(KERN_INFO "nvgpu: Mapped vaddr %llx @ paddr %llx. %lluKb pg. [%08x, %08x]\n", vaddr, paddr, pgsz_enum_to_bytes(attrs.pgsz)/1024, pte[1], pte[0]);
+//      if (pte_orig[0] != pte[0] || pte_orig[1] != pte[1]) {
+//              printk(KERN_INFO "nvgpu: Updated PTE entry from {%x,%x} to {%x, %x}\n", pte_orig[0], pte_orig[1], pte[0], pte[1]);
+//      }
+        return pgsz_enum_to_bytes(attrs.pgsz);
+}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 2fc0d44e..81f829ed 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -354,6 +354,23 @@ int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
 */
 int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
+/**
+ * __nvgpu_update_paddr - Remap a virtual address to a new physical address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @paddr - The new physical address to map to
+ *
+ * This function is a combination of __nvgpu_get_pte() and __nvgpu_set_pte().
+ * It searches for an existing PTE associated with @vaddr, and then updates
+ * only the physical address pointed to in the PTE to @paddr. All other
+ * attributes/fields of the PTE are preserved.
+ *
+ * This function returns the number of bytes mapped on success and -EINVAL
+ * otherwise.
+ */
+int __nvgpu_update_paddr(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u64 paddr);
 /*
 * Internal debugging routines. Probably not something you want to use.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
index 6f3beaa9..b86a428a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -49,6 +49,8 @@ struct nvgpu_mapped_buf_priv {
        struct dma_buf *dmabuf;
        struct dma_buf_attachment *attachment;
        struct sg_table *sgt;
+        // For fast reverse lookup (FD -> mapped_buf)
+        struct list_head nvmap_priv_entry;
 };
 /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index 3867c745..f007d880 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -261,6 +261,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
                                      struct vm_gk20a_mapping_batch *batch,
                                      enum nvgpu_aperture aperture);
+void nvgpu_vm_remap(struct nvgpu_mapped_buf *m);
 void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
                    struct vm_gk20a_mapping_batch *batch);
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.c b/drivers/gpu/nvgpu/os/linux/dmabuf.c
index e8e33130..08f78ae6 100644
--- a/drivers/gpu/nvgpu/os/linux/dmabuf.c
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf.c
@@ -124,8 +124,10 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
        struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
        dma_addr_t dma_addr;
-        if (IS_ERR(priv) || !priv)
+        if (IS_ERR(priv) || !priv) {
+                printk(KERN_ERR "nvgpu: Unable to access priv in gk20a_mm_unpin()\n");
                return;
+        }
        nvgpu_mutex_acquire(&priv->lock);
        WARN_ON(priv->sgt != sgt);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index f0cec178..9708ea1a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -32,6 +32,9 @@
 #include "platform_gk20a.h"
 #include "ioctl_as.h"
 #include "os_linux.h"
+#include <linux/nvmap.h> // For nvmap_dmabuf_{d/r}ealloc()
+#include "dmabuf.h" // struct dma_buf things for swapping
+#include "swap.h"
 static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
 {
@@ -329,6 +332,192 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp)
        return gk20a_as_release_share(as_share);
 }
+#define OLD_WALK 0
+/* Access dmabuf associated with passed file descriptor, copy the associated
+ * pages to an NVME drive, unpin associated pages from DMA'able space, and free
+ * said pages for use by others.
+ * dmabuf is put in a deallocated state, and any GPU mappings will be
+ * invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer().
+ */
+static int nvgpu_as_ioctl_write_swap_buffer(
+                struct gk20a_as_share *as_share,
+                struct nvgpu_as_swap_buffer_args *args)
+{
+        struct gk20a *g = gk20a_from_vm(as_share->vm);
+        int err = 0;
+#if OLD_WALK
+        struct nvgpu_rbtree_node *node;
+#endif
+        struct nvgpu_mapped_buf *m;
+        struct sg_table *sgt;
+        struct vm_gk20a *vm = as_share->vm;
+        struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
+        nvgpu_log_fn(g, " ");
+        if (IS_ERR(dmabuf))
+                return PTR_ERR(dmabuf);
+        // Other code walking vm->mapped_buffers grabs this lock
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+#if OLD_WALK
+        // Get mapped buffer corresponding to this dmabuf
+        // TODO: Error on buffer mapped >1
+        for_each_buffer(node, vm->mapped_buffers, m) {
+                if (m->os_priv.dmabuf == dmabuf)
+                        break;
+        }
+        // If failed search
+        if (!node || !m) {
+                // No mapped dmabuf associated with FD
+                err = -EBADFD;
+                goto out_put_unlock;
+        }
+#else
+        m = dmabuf_to_mapped_buf(dmabuf);
+        // If failed search
+        if (IS_ERR(m)) {
+                // No mapped dmabuf associated with FD
+                err = -EBADFD;
+                goto out_put_unlock;
+        }
+#endif
+        // Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap
+        if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) {
+                err = -ENOTRECOVERABLE;
+                goto out_put_unlock;
+        }
+        // Flush dirty GPU L2 cache lines to DRAM
+        // (Assuming that NVMe DRAM acceses are uncached)
+        gk20a_mm_l2_flush(g, false);
+        // Copy out (blocking)
+        err = copy_out(m->os_priv.sgt);
+        if (err) {
+                // Inaccessible swap device, etc
+                goto out_put_unlock;
+        }
+        // Unpin needs to happen after copy out is done
+        // (No return value check as it's a void function)
+        gk20a_mm_unpin(dev_from_vm(vm), m->os_priv.dmabuf,
+                       m->os_priv.attachment, m->os_priv.sgt);
+        // Deallocate dmabuf's backing pages
+        // TODO: Fail early for these cases (where the dmabuf is mmaped, etc),
+        //       before we do all the above (expensive) steps
+        err = nvmap_dealloc_dmabuf(dmabuf);
+        if (err) {
+                // Repin
+                sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
+                                   &m->os_priv.attachment);
+                m->os_priv.sgt = sgt;
+                goto out_put_unlock;
+        }
+out_put_unlock:
+        // Done with dmabuf, so release our ref to it
+        dma_buf_put(dmabuf);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return err;
+}
+// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does
+static int nvgpu_as_ioctl_read_swap_buffer(
+                struct gk20a_as_share *as_share,
+                struct nvgpu_as_swap_buffer_args *args)
+{
+        struct gk20a *g = gk20a_from_vm(as_share->vm);
+        int err = 0;
+#if OLD_WALK
+        struct nvgpu_rbtree_node *node;
+#endif
+        struct nvgpu_mapped_buf *m;
+        struct sg_table *sgt;
+        struct vm_gk20a *vm = as_share->vm;
+        struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
+        nvgpu_log_fn(g, " ");
+        if (!dmabuf)
+                return -EBADF;
+        // Other code walking vm->mapped_buffers grabs this lock
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+#if OLD_WALK
+        // Get mapped buffer corresponding to this dmabuf
+        // TODO: Error on buffer mapped >1
+        for_each_buffer(node, vm->mapped_buffers, m) {
+                if (m->os_priv.dmabuf == dmabuf)
+                        break;
+        }
+        // If failed search
+        if (!node || !m) {
+                // No mapped dmabuf associated with FD
+                err = -EBADFD;
+                goto out_put_unlock;
+        }
+#else
+        m = dmabuf_to_mapped_buf(dmabuf);
+        // If failed search
+        if (IS_ERR(m)) {
+                // No mapped dmabuf associated with FD
+                err = -EBADFD;
+                goto out_put_unlock;
+        }
+#endif
+        // Reallocate space for this buffer
+        err = nvmap_realloc_dmabuf(dmabuf);
+        if (err) {
+                // Out of memory (?)
+                goto out_put_unlock;
+        }
+        // Repin the buffer to DMA'able memory
+        sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
+                           &m->os_priv.attachment);
+        if (IS_ERR(sgt)) {
+                // Rollback allocation
+                err = nvmap_dealloc_dmabuf(dmabuf);
+                if (err)
+                        printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd);
+                err = PTR_ERR(sgt);
+                goto out_put_unlock;
+        }
+        // Do any bookeeping not done by gk20a_mm_pin()
+        m->os_priv.sgt = sgt;
+        // Reload page contents from disk (blocking)
+        err = copy_in(sgt);
+        if (err) {
+                int err2;
+                // Rollback pinning and allocation
+                gk20a_mm_unpin(dev_from_vm(vm), m->os_priv.dmabuf,
+                               m->os_priv.attachment, m->os_priv.sgt);
+                err2 = nvmap_dealloc_dmabuf(dmabuf);
+                if (err2)
+                        printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd);
+                // Inaccessible swap device, etc
+                goto out_put_unlock;
+        }
+        // Update GPU page tables (PT) to point to new allocation
+        nvgpu_vm_remap(m);
+        // Due to PT update, translation lookaside buffer needs clearing
+        g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+        // Invalidate L2 so that TLB refill does not load stale PT
+        gk20a_mm_l2_flush(g, true);
+out_put_unlock:
+        // Done with dmabuf, so release our ref to it
+        dma_buf_put(dmabuf);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return err;
+}
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
@@ -412,6 +601,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
                        (struct nvgpu_as_get_sync_ro_map_args *)buf);
                break;
+        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
+                err = nvgpu_as_ioctl_read_swap_buffer(as_share,
+                        (struct nvgpu_as_swap_buffer_args *)buf);
+                break;
+        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
+                err = nvgpu_as_ioctl_write_swap_buffer(as_share,
+                        (struct nvgpu_as_swap_buffer_args *)buf);
+                break;
        default:
                err = -ENOTTY;
                break;
diff --git a/drivers/gpu/nvgpu/os/linux/swap.h b/drivers/gpu/nvgpu/os/linux/swap.h
new file mode 100644
index 00000000..f762ba81
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/swap.h
@@ -0,0 +1,117 @@
+#include <linux/scatterlist.h>
+#include <linux/bio.h>
+//#include <nvgpu/bug.h>
+// Queue a command to copy out an SGT to disk
+// TODO: Cache bdev
+// TODO: Asynchronous I/O
+// TODO: Don't hardcode sector 0
+int copy(struct sg_table *sgt, int op) {
+  unsigned int i;
+  struct scatterlist *sg;
+  struct bio *bio;
+  int err = 0;
+  int sg_cnt = sgt->nents;
+  struct bio *bio_orig;
+  sector_t sector = 0; // XXX: For testing
+  // Find and open the block device
+  struct block_device *bdev = blkdev_get_by_path("/dev/nvme0n1", FMODE_READ | FMODE_WRITE, copy);
+  if (unlikely(IS_ERR(bdev))) {
+    printk(KERN_WARNING "Unabled to find `nvme0`, err %ld!\n", PTR_ERR(bdev));
+    return -ENODEV;
+  }
+  // Will never fail when allocating <= BIO_MAX_PAGES
+  bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+  bio_orig = bio;
+  bio->bi_bdev = bdev; // Switch to bio_set_dev(bdev) in newer kernels
+  bio->bi_iter.bi_sector = sector;
+  bio_set_op_attrs(bio, op, op == REQ_OP_WRITE ? WRITE_ODIRECT : 0);//REQ_SYNC); // XXX: Is REQ_SYNC necessary?
+  // Copy the scatter-gather table (sgt) into a block I/O vector (bio vec)
+  // bio_chain() approach borrowed from drivers/nvme/target/io-cmd.c:nvmet_execute_rw()
+  for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+    // On most iterations, this inner loop shouldn't happen at all. This loop
+    // conditional only triggers if we fill up the bio and are unable to map
+    // the full length of an SGL entry.
+    while (bio_add_page(bio, sg_page(sg), sg_dma_len(sg), sg->offset) != sg_dma_len(sg)) {
+      // Uh oh! We ran out of space in the bio. Allocate a new one and chain it...
+      struct bio *prev = bio;
+      bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+      bio->bi_bdev = bdev; // Switch to bio_set_dev(bdev) in newer kernels
+      bio->bi_iter.bi_sector = sector;
+      bio_set_op_attrs(bio, op, op == REQ_OP_WRITE ? WRITE_ODIRECT : 0);
+      bio_chain(bio, prev);
+      // Get the I/O started
+      submit_bio(prev);
+      // No need to call bio_put() as that's automatically managed for chained bios
+    }
+    sector += sg_dma_len(sg) >> 9;
+    sg_cnt--;
+  }
+  // Use blocking submit for now
+  // TODO: Switch to async via submit_bio(bio)
+  err = submit_bio_wait(bio);
+  if (bio->bi_error && bio->bi_error != err)
+    printk(KERN_WARNING "nvgpu: bio->bi_error %d != return val from submit_bio_wait() %d\n", bio->bi_error, err);
+//out:
+  bio_put(bio_orig); // TODO: Move to completion handler
+  blkdev_put(bdev, FMODE_WRITE|FMODE_READ);
+  return err;
+}
+// Patterned off how __nvgpu_vm_find_mapped_buf_reverse() works in vm.c
+// Needs struct nvgpu_rbtree_node *node, struct nvgpu_rbtree_node *root,
+// and struct nvgpu_mapped_buf *m.
+// Steps until end of rbtree OR !m
+#define for_each_buffer(node, root, m) \
+  for (nvgpu_rbtree_enum_start(0, &node, root); \
+       node && (uintptr_t)(m = mapped_buffer_from_rbtree_node(node)); \
+       nvgpu_rbtree_enum_next(&node, node))
+// New, fast replacement to looking through with the above macro to match
+struct nvgpu_mapped_buf* dmabuf_to_mapped_buf(struct dma_buf *dmabuf) {
+  struct list_head *nvmap_priv = nvmap_get_priv_list(dmabuf);
+  struct nvgpu_mapped_buf *mapped_buffer;
+  struct nvgpu_mapped_buf_priv *priv;
+  if (IS_ERR(nvmap_priv))
+    return ERR_PTR(-EOPNOTSUPP);
+  priv = list_first_entry_or_null(nvmap_priv, struct nvgpu_mapped_buf_priv, nvmap_priv_entry);
+  if (unlikely(!priv)) {
+    printk(KERN_ERR "nvgpu: State tracking error for fast reverse lookups. Have unattached dmabuf!");
+    return ERR_PTR(-ENOTRECOVERABLE);
+  }
+  mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv);
+  if (unlikely(mapped_buffer->os_priv.dmabuf != dmabuf)) {
+    printk(KERN_ERR "nvgpu: dmabuf_to_mapped_buf mapping inconsistent! BUG!\n");
+    return ERR_PTR(-ENOTRECOVERABLE);
+  }
+  if (!list_is_singular(&priv->nvmap_priv_entry)) {
+    printk(KERN_WARNING "nvgpu: Requesting paging on memory with multiple mappings! Aborting...\n");
+    return ERR_PTR(-EOPNOTSUPP);
+  }
+  return mapped_buffer;
+}
+int copy_all(struct vm_gk20a *vm) {
+        struct nvgpu_rbtree_node *node;
+        struct nvgpu_mapped_buf *m;
+        for_each_buffer(node, vm->mapped_buffers, m) {
+                // TODO
+                continue;
+        }
+        return 0;
+}
+int copy_out(struct sg_table *sgt) {
+  return copy(sgt, REQ_OP_WRITE);
+}
+int copy_in(struct sg_table *sgt) {
+  return copy(sgt, REQ_OP_READ);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
index 8956cce5..fcb58ac4 100644
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -15,6 +15,7 @@
 */
 #include <linux/dma-buf.h>
+#include <linux/nvmap.h>
 #include <linux/scatterlist.h>
 #include <uapi/linux/nvgpu.h>
@@ -71,7 +72,23 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
 {
        struct nvgpu_rbtree_node *node = NULL;
        struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+        struct list_head* nvmap_priv;
+        // Try fast lookup first
+        if (!IS_ERR(nvmap_priv = nvmap_get_priv_list(dmabuf))) {
+                struct nvgpu_mapped_buf *mapped_buffer;
+                struct nvgpu_mapped_buf_priv *priv;
+                list_for_each_entry(priv, nvmap_priv, nvmap_priv_entry) {
+                        mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv);
+                        if (mapped_buffer->os_priv.dmabuf == dmabuf &&
+                            mapped_buffer->kind == kind)
+                                return mapped_buffer;
+                }
+        }
+        // Full traversal (not an nvmap buffer?)
+        printk(KERN_INFO "nvmap: Fast reverse lookup failed!");
        nvgpu_rbtree_enum_start(0, &node, root);
        while (node) {
@@ -158,6 +175,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
         */
        gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment,
                       mapped_buffer->os_priv.sgt);
+        list_del(&mapped_buffer->os_priv.nvmap_priv_entry);
        dma_buf_put(os_buf->dmabuf);
        nvgpu_log(g, gpu_dbg_map,
@@ -198,6 +216,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
        struct nvgpu_sgt *nvgpu_sgt = NULL;
        struct nvgpu_mapped_buf *mapped_buffer = NULL;
        struct dma_buf_attachment *attachment;
+        struct list_head *nvmap_priv;
        int err = 0;
        sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
@@ -243,6 +262,12 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
        mapped_buffer->os_priv.dmabuf = dmabuf;
        mapped_buffer->os_priv.attachment = attachment;
        mapped_buffer->os_priv.sgt    = sgt;
+        nvmap_priv = nvmap_get_priv_list(dmabuf);
+        if (!IS_ERR(nvmap_priv))
+                list_add(&mapped_buffer->os_priv.nvmap_priv_entry, nvmap_priv);
+        else
+                // So we can always safely call list_del()
+                INIT_LIST_HEAD(&mapped_buffer->os_priv.nvmap_priv_entry);
        *gpu_va = mapped_buffer->addr;
        return 0;
@@ -353,6 +378,49 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
        gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
                       mapped_buffer->os_priv.attachment,
                       mapped_buffer->os_priv.sgt);
+        list_del(&mapped_buffer->os_priv.nvmap_priv_entry);
        dma_buf_put(mapped_buffer->os_priv.dmabuf);
 }
+/**
+ * Given an nvgpu_mapped_buf m, map m->os_priv.sgt into m->addr
+ * Very similar to nvgpu_vm_map_buffer, except that this assumes all necessary
+ * PTEs and PDEs have been created. This merely updates the physical address(es)
+ * in the associated PTEs, leaving all other attributes unchanged.
+ *
+ * NOP if sgt is already mapped for addr.
+ *
+ * vm->gmmu_update_lock must be held.
+ *
+ * Caller is responsible for flushing the TLB and L2 caches.
+ */
+void nvgpu_vm_remap(struct nvgpu_mapped_buf *m)
+{
+        // TODO: Input validation
+        struct scatterlist *sg;
+        unsigned int i = 0;
+        u64 curr_vaddr = m->addr;
+        // For each element of the scatterlist
+        // (based off for_each_sgtable_dma_sg() macro in newer kernels)
+        for_each_sg(m->os_priv.sgt->sgl, sg, m->os_priv.sgt->nents, i) {
+                unsigned int sg_off = 0;
+                // Keep mapping data at the next unmapped virtual address
+                // until each scatterlist element is entirely mapped
+                while (sg_off < sg_dma_len(sg)) {
+                        int amt_mapped = __nvgpu_update_paddr(gk20a_from_vm(m->vm),
+                                                              m->vm,
+                                                              curr_vaddr,
+                                                              sg_dma_address(sg) + sg_off);
+                        if (amt_mapped < 0) {
+                                printk(KERN_ERR "nvgpu: Error %d from __nvgpu_update_paddr() in nvgpu_vm_remap()! Had mapped %llu of %llu bytes.\n", amt_mapped, curr_vaddr - m->addr, m->size);
+                                return;
+                        }
+                        curr_vaddr += amt_mapped;
+                        sg_off += amt_mapped;
+                }
+        }
+        if (curr_vaddr != m->addr + m->size) {
+                printk(KERN_ERR "nvgpu: Mapped %llu bytes when %llu bytes expected! Expect page table corruption!\n", curr_vaddr - m->addr, m->size);
+        }
+}
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 873e787f..0138b720 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -2176,6 +2176,10 @@ struct nvgpu_as_get_sync_ro_map_args {
        __u32 padding;
 };
+struct nvgpu_as_swap_buffer_args {
+        __u32 dmabuf_fd;        /* in */
+};
 #define NVGPU_AS_IOCTL_BIND_CHANNEL \
        _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args)
 #define NVGPU32_AS_IOCTL_ALLOC_SPACE \
@@ -2198,9 +2202,13 @@ struct nvgpu_as_get_sync_ro_map_args {
        _IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args)
 #define NVGPU_AS_IOCTL_GET_SYNC_RO_MAP  \
        _IOR(NVGPU_AS_IOCTL_MAGIC,  12, struct nvgpu_as_get_sync_ro_map_args)
+#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER        \
+        _IOW(NVGPU_AS_IOCTL_MAGIC,  13, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER \
+        _IOW(NVGPU_AS_IOCTL_MAGIC,  14, struct nvgpu_as_swap_buffer_args)
 #define NVGPU_AS_IOCTL_LAST            \
-        _IOC_NR(NVGPU_AS_IOCTL_GET_SYNC_RO_MAP)
+        _IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER)
 #define NVGPU_AS_IOCTL_MAX_ARG_SIZE     \
        sizeof(struct nvgpu_as_map_buffer_ex_args)
author	Joshua Bakita <jbakita@cs.unc.edu>	2022-05-24 21:11:59 -0400
committer	Joshua Bakita <jbakita@cs.unc.edu>	2022-05-24 21:11:59 -0400
commit	ee26a2842ca891d3ae8b1de1b066d29234fc0115 (patch)
tree	a0bf21050569e8d369fc1410860e57158fac761b
parent	46b43d2b2485233397f4f62b9bac6d35434b7aea (diff)