From ee26a2842ca891d3ae8b1de1b066d29234fc0115 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Tue, 24 May 2022 21:11:59 -0400 Subject: gpu-paging: Initial working implementation Supports synchronous page out or in of a specific buffer. Includes fast reverse struct mapped_buf lookup. Requires initial set of changes to nvmap as well. --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/common/mm/gmmu.c | 54 ++++++++ drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 17 +++ drivers/gpu/nvgpu/include/nvgpu/linux/vm.h | 2 + drivers/gpu/nvgpu/include/nvgpu/vm.h | 2 + drivers/gpu/nvgpu/os/linux/dmabuf.c | 4 +- drivers/gpu/nvgpu/os/linux/ioctl_as.c | 197 +++++++++++++++++++++++++++++ drivers/gpu/nvgpu/os/linux/swap.h | 117 +++++++++++++++++ drivers/gpu/nvgpu/os/linux/vm.c | 70 +++++++++- include/uapi/linux/nvgpu.h | 10 +- 10 files changed, 471 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/nvgpu/os/linux/swap.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 8c5b92e1..c23c858a 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -9,6 +9,7 @@ ccflags-y += -I$(srctree.nvgpu-next)/drivers/gpu/nvgpu ccflags-y += -I$(srctree)/drivers/devfreq ccflags-y += -Wno-multichar +ccflags-y += -Wno-sign-compare ccflags-y += -Werror ccflags-y += -Wno-error=cpp ifeq ($(VERSION),4) diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 748e9f45..a04e501f 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -36,6 +36,9 @@ #include "gk20a/mm_gk20a.h" +// XXX: Shouldn't really be here! Needed for __nvgpu_update_paddr() +#include + #define __gmmu_dbg(g, attrs, fmt, args...) \ do { \ if (attrs->debug) { \ @@ -938,3 +941,54 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) return 0; } + +u64 pgsz_enum_to_bytes(int sz) { + if (sz == GMMU_PAGE_SIZE_SMALL) + return SZ_4K; + else + return SZ_64K; // Dangerous! Big pages may also be 128k. Should check ram_in_big_page_size... registers. +} + +// Caller is responsible for TLB/L2 flushing so that this can be called +// repeatedly with low overhead. +int __nvgpu_update_paddr(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u64 paddr) +{ + struct nvgpu_gmmu_pd *pd; + u32 pd_idx, pd_offs; + int err; + u32 pte[2]; // Safe for at least gv11b + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; +// u32 pte_orig[2]; + + // Get existing pte entry and location + err = __nvgpu_locate_pte(g, vm, &vm->pdb, + vaddr, 0, &attrs, + pte, &pd, &pd_idx, &pd_offs); + if (unlikely(err)) { + printk(KERN_ERR "nvgpu: Unable to find PTE for vaddr %llx in __nvgpu_update_paddr()\n", vaddr); + return err; + } + // TODO: Verify that the PTE is actually in SYSMEM +// pte_orig[0] = pte[0]; +// pte_orig[1] = pte[1]; + + // Following logic is borrowed from __update_pte() for gp10b+ + // TODO: Make this work for gk20a-gp10b! + // Zero-out the address field + pte[0] &= ~gmmu_new_pte_address_sys_f(~0 >> gmmu_new_pte_address_shift_v()); + pte[1] &= ~(~0U >> (24 + gmmu_new_pte_address_shift_v())); + // Write new address (upper and lower bits) + pte[0] |= gmmu_new_pte_address_sys_f(paddr >> gmmu_new_pte_address_shift_v()); + pte[1] |= paddr >> (24 + gmmu_new_pte_address_shift_v()); + // Commit to the page tables + pd_write(g, pd, pd_offs, pte[0]); + pd_write(g, pd, pd_offs + 1, pte[1]); + nvgpu_wmb(); // XXX: Is this needed? +// printk(KERN_INFO "nvgpu: Mapped vaddr %llx @ paddr %llx. %lluKb pg. [%08x, %08x]\n", vaddr, paddr, pgsz_enum_to_bytes(attrs.pgsz)/1024, pte[1], pte[0]); +// if (pte_orig[0] != pte[0] || pte_orig[1] != pte[1]) { +// printk(KERN_INFO "nvgpu: Updated PTE entry from {%x,%x} to {%x, %x}\n", pte_orig[0], pte_orig[1], pte[0], pte[1]); +// } + return pgsz_enum_to_bytes(attrs.pgsz); +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 2fc0d44e..81f829ed 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -354,6 +354,23 @@ int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); */ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); +/** + * __nvgpu_update_paddr - Remap a virtual address to a new physical address + * + * @g - The GPU. + * @vm - VM to look in. + * @vaddr - GPU virtual address. + * @paddr - The new physical address to map to + * + * This function is a combination of __nvgpu_get_pte() and __nvgpu_set_pte(). + * It searches for an existing PTE associated with @vaddr, and then updates + * only the physical address pointed to in the PTE to @paddr. All other + * attributes/fields of the PTE are preserved. + * + * This function returns the number of bytes mapped on success and -EINVAL + * otherwise. + */ +int __nvgpu_update_paddr(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u64 paddr); /* * Internal debugging routines. Probably not something you want to use. diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h index 6f3beaa9..b86a428a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h @@ -49,6 +49,8 @@ struct nvgpu_mapped_buf_priv { struct dma_buf *dmabuf; struct dma_buf_attachment *attachment; struct sg_table *sgt; + // For fast reverse lookup (FD -> mapped_buf) + struct list_head nvmap_priv_entry; }; /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 3867c745..f007d880 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -261,6 +261,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch, enum nvgpu_aperture aperture); +void nvgpu_vm_remap(struct nvgpu_mapped_buf *m); + void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, struct vm_gk20a_mapping_batch *batch); diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.c b/drivers/gpu/nvgpu/os/linux/dmabuf.c index e8e33130..08f78ae6 100644 --- a/drivers/gpu/nvgpu/os/linux/dmabuf.c +++ b/drivers/gpu/nvgpu/os/linux/dmabuf.c @@ -124,8 +124,10 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); dma_addr_t dma_addr; - if (IS_ERR(priv) || !priv) + if (IS_ERR(priv) || !priv) { + printk(KERN_ERR "nvgpu: Unable to access priv in gk20a_mm_unpin()\n"); return; + } nvgpu_mutex_acquire(&priv->lock); WARN_ON(priv->sgt != sgt); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index f0cec178..9708ea1a 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -32,6 +32,9 @@ #include "platform_gk20a.h" #include "ioctl_as.h" #include "os_linux.h" +#include // For nvmap_dmabuf_{d/r}ealloc() +#include "dmabuf.h" // struct dma_buf things for swapping +#include "swap.h" static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags) { @@ -329,6 +332,192 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp) return gk20a_as_release_share(as_share); } +#define OLD_WALK 0 + +/* Access dmabuf associated with passed file descriptor, copy the associated + * pages to an NVME drive, unpin associated pages from DMA'able space, and free + * said pages for use by others. + * dmabuf is put in a deallocated state, and any GPU mappings will be + * invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer(). + */ +static int nvgpu_as_ioctl_write_swap_buffer( + struct gk20a_as_share *as_share, + struct nvgpu_as_swap_buffer_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + int err = 0; +#if OLD_WALK + struct nvgpu_rbtree_node *node; +#endif + struct nvgpu_mapped_buf *m; + struct sg_table *sgt; + struct vm_gk20a *vm = as_share->vm; + struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd); + + nvgpu_log_fn(g, " "); + + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + // Other code walking vm->mapped_buffers grabs this lock + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + +#if OLD_WALK + // Get mapped buffer corresponding to this dmabuf + // TODO: Error on buffer mapped >1 + for_each_buffer(node, vm->mapped_buffers, m) { + if (m->os_priv.dmabuf == dmabuf) + break; + } + // If failed search + if (!node || !m) { + // No mapped dmabuf associated with FD + err = -EBADFD; + goto out_put_unlock; + } +#else + m = dmabuf_to_mapped_buf(dmabuf); + // If failed search + if (IS_ERR(m)) { + // No mapped dmabuf associated with FD + err = -EBADFD; + goto out_put_unlock; + } +#endif + + // Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap + if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) { + err = -ENOTRECOVERABLE; + goto out_put_unlock; + } + + // Flush dirty GPU L2 cache lines to DRAM + // (Assuming that NVMe DRAM acceses are uncached) + gk20a_mm_l2_flush(g, false); + + // Copy out (blocking) + err = copy_out(m->os_priv.sgt); + if (err) { + // Inaccessible swap device, etc + goto out_put_unlock; + } + + // Unpin needs to happen after copy out is done + // (No return value check as it's a void function) + gk20a_mm_unpin(dev_from_vm(vm), m->os_priv.dmabuf, + m->os_priv.attachment, m->os_priv.sgt); + + // Deallocate dmabuf's backing pages + // TODO: Fail early for these cases (where the dmabuf is mmaped, etc), + // before we do all the above (expensive) steps + err = nvmap_dealloc_dmabuf(dmabuf); + if (err) { + // Repin + sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf, + &m->os_priv.attachment); + m->os_priv.sgt = sgt; + goto out_put_unlock; + } + +out_put_unlock: + // Done with dmabuf, so release our ref to it + dma_buf_put(dmabuf); + nvgpu_mutex_release(&vm->update_gmmu_lock); + return err; +} + +// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does +static int nvgpu_as_ioctl_read_swap_buffer( + struct gk20a_as_share *as_share, + struct nvgpu_as_swap_buffer_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + int err = 0; +#if OLD_WALK + struct nvgpu_rbtree_node *node; +#endif + struct nvgpu_mapped_buf *m; + struct sg_table *sgt; + struct vm_gk20a *vm = as_share->vm; + struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd); + + nvgpu_log_fn(g, " "); + + if (!dmabuf) + return -EBADF; + // Other code walking vm->mapped_buffers grabs this lock + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + +#if OLD_WALK + // Get mapped buffer corresponding to this dmabuf + // TODO: Error on buffer mapped >1 + for_each_buffer(node, vm->mapped_buffers, m) { + if (m->os_priv.dmabuf == dmabuf) + break; + } + // If failed search + if (!node || !m) { + // No mapped dmabuf associated with FD + err = -EBADFD; + goto out_put_unlock; + } +#else + m = dmabuf_to_mapped_buf(dmabuf); + // If failed search + if (IS_ERR(m)) { + // No mapped dmabuf associated with FD + err = -EBADFD; + goto out_put_unlock; + } +#endif + + // Reallocate space for this buffer + err = nvmap_realloc_dmabuf(dmabuf); + if (err) { + // Out of memory (?) + goto out_put_unlock; + } + + // Repin the buffer to DMA'able memory + sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf, + &m->os_priv.attachment); + if (IS_ERR(sgt)) { + // Rollback allocation + err = nvmap_dealloc_dmabuf(dmabuf); + if (err) + printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd); + err = PTR_ERR(sgt); + goto out_put_unlock; + } + // Do any bookeeping not done by gk20a_mm_pin() + m->os_priv.sgt = sgt; + + // Reload page contents from disk (blocking) + err = copy_in(sgt); + if (err) { + int err2; + // Rollback pinning and allocation + gk20a_mm_unpin(dev_from_vm(vm), m->os_priv.dmabuf, + m->os_priv.attachment, m->os_priv.sgt); + err2 = nvmap_dealloc_dmabuf(dmabuf); + if (err2) + printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd); + // Inaccessible swap device, etc + goto out_put_unlock; + } + // Update GPU page tables (PT) to point to new allocation + nvgpu_vm_remap(m); + // Due to PT update, translation lookaside buffer needs clearing + g->ops.fb.tlb_invalidate(g, vm->pdb.mem); + // Invalidate L2 so that TLB refill does not load stale PT + gk20a_mm_l2_flush(g, true); + +out_put_unlock: + // Done with dmabuf, so release our ref to it + dma_buf_put(dmabuf); + nvgpu_mutex_release(&vm->update_gmmu_lock); + return err; +} long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -412,6 +601,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = nvgpu_as_ioctl_get_sync_ro_map(as_share, (struct nvgpu_as_get_sync_ro_map_args *)buf); break; + case NVGPU_AS_IOCTL_READ_SWAP_BUFFER: + err = nvgpu_as_ioctl_read_swap_buffer(as_share, + (struct nvgpu_as_swap_buffer_args *)buf); + break; + case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER: + err = nvgpu_as_ioctl_write_swap_buffer(as_share, + (struct nvgpu_as_swap_buffer_args *)buf); + break; default: err = -ENOTTY; break; diff --git a/drivers/gpu/nvgpu/os/linux/swap.h b/drivers/gpu/nvgpu/os/linux/swap.h new file mode 100644 index 00000000..f762ba81 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/swap.h @@ -0,0 +1,117 @@ +#include +#include +//#include + +// Queue a command to copy out an SGT to disk +// TODO: Cache bdev +// TODO: Asynchronous I/O +// TODO: Don't hardcode sector 0 +int copy(struct sg_table *sgt, int op) { + unsigned int i; + struct scatterlist *sg; + struct bio *bio; + int err = 0; + int sg_cnt = sgt->nents; + struct bio *bio_orig; + sector_t sector = 0; // XXX: For testing + // Find and open the block device + struct block_device *bdev = blkdev_get_by_path("/dev/nvme0n1", FMODE_READ | FMODE_WRITE, copy); + if (unlikely(IS_ERR(bdev))) { + printk(KERN_WARNING "Unabled to find `nvme0`, err %ld!\n", PTR_ERR(bdev)); + return -ENODEV; + } + // Will never fail when allocating <= BIO_MAX_PAGES + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio_orig = bio; + bio->bi_bdev = bdev; // Switch to bio_set_dev(bdev) in newer kernels + bio->bi_iter.bi_sector = sector; + bio_set_op_attrs(bio, op, op == REQ_OP_WRITE ? WRITE_ODIRECT : 0);//REQ_SYNC); // XXX: Is REQ_SYNC necessary? + // Copy the scatter-gather table (sgt) into a block I/O vector (bio vec) + // bio_chain() approach borrowed from drivers/nvme/target/io-cmd.c:nvmet_execute_rw() + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + // On most iterations, this inner loop shouldn't happen at all. This loop + // conditional only triggers if we fill up the bio and are unable to map + // the full length of an SGL entry. + while (bio_add_page(bio, sg_page(sg), sg_dma_len(sg), sg->offset) != sg_dma_len(sg)) { + // Uh oh! We ran out of space in the bio. Allocate a new one and chain it... + struct bio *prev = bio; + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio->bi_bdev = bdev; // Switch to bio_set_dev(bdev) in newer kernels + bio->bi_iter.bi_sector = sector; + bio_set_op_attrs(bio, op, op == REQ_OP_WRITE ? WRITE_ODIRECT : 0); + bio_chain(bio, prev); + // Get the I/O started + submit_bio(prev); + // No need to call bio_put() as that's automatically managed for chained bios + } + sector += sg_dma_len(sg) >> 9; + sg_cnt--; + } + // Use blocking submit for now + // TODO: Switch to async via submit_bio(bio) + err = submit_bio_wait(bio); + + if (bio->bi_error && bio->bi_error != err) + printk(KERN_WARNING "nvgpu: bio->bi_error %d != return val from submit_bio_wait() %d\n", bio->bi_error, err); + +//out: + bio_put(bio_orig); // TODO: Move to completion handler + blkdev_put(bdev, FMODE_WRITE|FMODE_READ); + return err; +} + +// Patterned off how __nvgpu_vm_find_mapped_buf_reverse() works in vm.c +// Needs struct nvgpu_rbtree_node *node, struct nvgpu_rbtree_node *root, +// and struct nvgpu_mapped_buf *m. +// Steps until end of rbtree OR !m +#define for_each_buffer(node, root, m) \ + for (nvgpu_rbtree_enum_start(0, &node, root); \ + node && (uintptr_t)(m = mapped_buffer_from_rbtree_node(node)); \ + nvgpu_rbtree_enum_next(&node, node)) + +// New, fast replacement to looking through with the above macro to match +struct nvgpu_mapped_buf* dmabuf_to_mapped_buf(struct dma_buf *dmabuf) { + struct list_head *nvmap_priv = nvmap_get_priv_list(dmabuf); + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_mapped_buf_priv *priv; + + if (IS_ERR(nvmap_priv)) + return ERR_PTR(-EOPNOTSUPP); + + priv = list_first_entry_or_null(nvmap_priv, struct nvgpu_mapped_buf_priv, nvmap_priv_entry); + if (unlikely(!priv)) { + printk(KERN_ERR "nvgpu: State tracking error for fast reverse lookups. Have unattached dmabuf!"); + return ERR_PTR(-ENOTRECOVERABLE); + } + + mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv); + if (unlikely(mapped_buffer->os_priv.dmabuf != dmabuf)) { + printk(KERN_ERR "nvgpu: dmabuf_to_mapped_buf mapping inconsistent! BUG!\n"); + return ERR_PTR(-ENOTRECOVERABLE); + } + if (!list_is_singular(&priv->nvmap_priv_entry)) { + printk(KERN_WARNING "nvgpu: Requesting paging on memory with multiple mappings! Aborting...\n"); + return ERR_PTR(-EOPNOTSUPP); + } + return mapped_buffer; +} + +int copy_all(struct vm_gk20a *vm) { + struct nvgpu_rbtree_node *node; + struct nvgpu_mapped_buf *m; + + for_each_buffer(node, vm->mapped_buffers, m) { + // TODO + continue; + } + return 0; +} + +int copy_out(struct sg_table *sgt) { + return copy(sgt, REQ_OP_WRITE); +} + +int copy_in(struct sg_table *sgt) { + return copy(sgt, REQ_OP_READ); +} + diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index 8956cce5..fcb58ac4 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -71,7 +72,23 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( { struct nvgpu_rbtree_node *node = NULL; struct nvgpu_rbtree_node *root = vm->mapped_buffers; + struct list_head* nvmap_priv; + + // Try fast lookup first + if (!IS_ERR(nvmap_priv = nvmap_get_priv_list(dmabuf))) { + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_mapped_buf_priv *priv; + + list_for_each_entry(priv, nvmap_priv, nvmap_priv_entry) { + mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv); + if (mapped_buffer->os_priv.dmabuf == dmabuf && + mapped_buffer->kind == kind) + return mapped_buffer; + } + } + // Full traversal (not an nvmap buffer?) + printk(KERN_INFO "nvmap: Fast reverse lookup failed!"); nvgpu_rbtree_enum_start(0, &node, root); while (node) { @@ -158,6 +175,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, */ gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, mapped_buffer->os_priv.sgt); + list_del(&mapped_buffer->os_priv.nvmap_priv_entry); dma_buf_put(os_buf->dmabuf); nvgpu_log(g, gpu_dbg_map, @@ -198,6 +216,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, struct nvgpu_sgt *nvgpu_sgt = NULL; struct nvgpu_mapped_buf *mapped_buffer = NULL; struct dma_buf_attachment *attachment; + struct list_head *nvmap_priv; int err = 0; sgt = gk20a_mm_pin(dev, dmabuf, &attachment); @@ -243,6 +262,12 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, mapped_buffer->os_priv.dmabuf = dmabuf; mapped_buffer->os_priv.attachment = attachment; mapped_buffer->os_priv.sgt = sgt; + nvmap_priv = nvmap_get_priv_list(dmabuf); + if (!IS_ERR(nvmap_priv)) + list_add(&mapped_buffer->os_priv.nvmap_priv_entry, nvmap_priv); + else + // So we can always safely call list_del() + INIT_LIST_HEAD(&mapped_buffer->os_priv.nvmap_priv_entry); *gpu_va = mapped_buffer->addr; return 0; @@ -353,6 +378,49 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, mapped_buffer->os_priv.attachment, mapped_buffer->os_priv.sgt); - + list_del(&mapped_buffer->os_priv.nvmap_priv_entry); dma_buf_put(mapped_buffer->os_priv.dmabuf); } + +/** + * Given an nvgpu_mapped_buf m, map m->os_priv.sgt into m->addr + * Very similar to nvgpu_vm_map_buffer, except that this assumes all necessary + * PTEs and PDEs have been created. This merely updates the physical address(es) + * in the associated PTEs, leaving all other attributes unchanged. + * + * NOP if sgt is already mapped for addr. + * + * vm->gmmu_update_lock must be held. + * + * Caller is responsible for flushing the TLB and L2 caches. + */ +void nvgpu_vm_remap(struct nvgpu_mapped_buf *m) +{ + // TODO: Input validation + struct scatterlist *sg; + unsigned int i = 0; + u64 curr_vaddr = m->addr; + + // For each element of the scatterlist + // (based off for_each_sgtable_dma_sg() macro in newer kernels) + for_each_sg(m->os_priv.sgt->sgl, sg, m->os_priv.sgt->nents, i) { + unsigned int sg_off = 0; + // Keep mapping data at the next unmapped virtual address + // until each scatterlist element is entirely mapped + while (sg_off < sg_dma_len(sg)) { + int amt_mapped = __nvgpu_update_paddr(gk20a_from_vm(m->vm), + m->vm, + curr_vaddr, + sg_dma_address(sg) + sg_off); + if (amt_mapped < 0) { + printk(KERN_ERR "nvgpu: Error %d from __nvgpu_update_paddr() in nvgpu_vm_remap()! Had mapped %llu of %llu bytes.\n", amt_mapped, curr_vaddr - m->addr, m->size); + return; + } + curr_vaddr += amt_mapped; + sg_off += amt_mapped; + } + } + if (curr_vaddr != m->addr + m->size) { + printk(KERN_ERR "nvgpu: Mapped %llu bytes when %llu bytes expected! Expect page table corruption!\n", curr_vaddr - m->addr, m->size); + } +} diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 873e787f..0138b720 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -2176,6 +2176,10 @@ struct nvgpu_as_get_sync_ro_map_args { __u32 padding; }; +struct nvgpu_as_swap_buffer_args { + __u32 dmabuf_fd; /* in */ +}; + #define NVGPU_AS_IOCTL_BIND_CHANNEL \ _IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args) #define NVGPU32_AS_IOCTL_ALLOC_SPACE \ @@ -2198,9 +2202,13 @@ struct nvgpu_as_get_sync_ro_map_args { _IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args) #define NVGPU_AS_IOCTL_GET_SYNC_RO_MAP \ _IOR(NVGPU_AS_IOCTL_MAGIC, 12, struct nvgpu_as_get_sync_ro_map_args) +#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER \ + _IOW(NVGPU_AS_IOCTL_MAGIC, 13, struct nvgpu_as_swap_buffer_args) +#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER \ + _IOW(NVGPU_AS_IOCTL_MAGIC, 14, struct nvgpu_as_swap_buffer_args) #define NVGPU_AS_IOCTL_LAST \ - _IOC_NR(NVGPU_AS_IOCTL_GET_SYNC_RO_MAP) + _IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER) #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_as_map_buffer_ex_args) -- cgit v1.2.2