From ff66847a00ac27d8d94b3664ec156a195dbf3676 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Wed, 25 May 2022 22:01:24 -0400 Subject: gpu-paging: Split swap in/out to prepare for async support. --- drivers/gpu/nvgpu/os/linux/ioctl_as.c | 268 +++++++++++++++++++++------------- include/uapi/linux/nvgpu.h | 10 +- 2 files changed, 175 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 9708ea1a..af6cdb5b 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -332,75 +332,68 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp) return gk20a_as_release_share(as_share); } -#define OLD_WALK 0 /* Access dmabuf associated with passed file descriptor, copy the associated * pages to an NVME drive, unpin associated pages from DMA'able space, and free * said pages for use by others. * dmabuf is put in a deallocated state, and any GPU mappings will be * invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer(). + * ... + * Starts a swap-out operation by flushing the GPU L2 and starting I/O. + * vm->update_gmmu_lock /must/ already be held. */ static int nvgpu_as_ioctl_write_swap_buffer( - struct gk20a_as_share *as_share, - struct nvgpu_as_swap_buffer_args *args) + struct dma_buf *dmabuf, + struct nvgpu_mapped_buf *m, + struct vm_gk20a *vm, + struct gk20a *g) { - struct gk20a *g = gk20a_from_vm(as_share->vm); int err = 0; -#if OLD_WALK - struct nvgpu_rbtree_node *node; -#endif - struct nvgpu_mapped_buf *m; - struct sg_table *sgt; - struct vm_gk20a *vm = as_share->vm; - struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd); nvgpu_log_fn(g, " "); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - - // Other code walking vm->mapped_buffers grabs this lock - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - -#if OLD_WALK - // Get mapped buffer corresponding to this dmabuf - // TODO: Error on buffer mapped >1 - for_each_buffer(node, vm->mapped_buffers, m) { - if (m->os_priv.dmabuf == dmabuf) - break; - } - // If failed search - if (!node || !m) { - // No mapped dmabuf associated with FD - err = -EBADFD; - goto out_put_unlock; - } -#else - m = dmabuf_to_mapped_buf(dmabuf); - // If failed search - if (IS_ERR(m)) { - // No mapped dmabuf associated with FD - err = -EBADFD; - goto out_put_unlock; - } -#endif - // Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) { err = -ENOTRECOVERABLE; - goto out_put_unlock; + goto out; } + // TODO: Verify that we'll likely be able to free the pages later + // before we start the copy. + // Flush dirty GPU L2 cache lines to DRAM // (Assuming that NVMe DRAM acceses are uncached) gk20a_mm_l2_flush(g, false); - // Copy out (blocking) + // Copy out (blocking) TODO: non-blocking + // Could fail on inaccessible swap device, etc err = copy_out(m->os_priv.sgt); - if (err) { - // Inaccessible swap device, etc - goto out_put_unlock; - } + +out: + return err; +} + +/* Finish a swap-out operation by waiting on I/O to complete, then unpinning + * and freeing the pages. + * + * Note that this may fail even if nvgpu_as_ioctl_write_swap_buffer() + * succeeded if the user mmaps the buffer before finishing the swap-out. + * + * vm->update_gmmu_lock /must/ already be held. + */ +static int nvgpu_as_ioctl_write_swap_buffer_finish( + struct dma_buf *dmabuf, + struct nvgpu_mapped_buf *m, + struct vm_gk20a *vm, + struct gk20a *g) +{ + struct sg_table *sgt; + int err = 0; + + nvgpu_log_fn(g, " "); + + // Wait for the pages to get written out + //wait_for_completion_io(m->os_priv.swap_completion); // Unpin needs to happen after copy out is done // (No return value check as it's a void function) @@ -416,67 +409,30 @@ static int nvgpu_as_ioctl_write_swap_buffer( sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf, &m->os_priv.attachment); m->os_priv.sgt = sgt; - goto out_put_unlock; } -out_put_unlock: - // Done with dmabuf, so release our ref to it - dma_buf_put(dmabuf); - nvgpu_mutex_release(&vm->update_gmmu_lock); return err; } -// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does +/* Starts a swap-in operation by allocating and pinning backing pages, and + * starting I/O. + * vm->update_gmmu_lock /must/ already be held. + */ static int nvgpu_as_ioctl_read_swap_buffer( - struct gk20a_as_share *as_share, - struct nvgpu_as_swap_buffer_args *args) + struct dma_buf *dmabuf, + struct nvgpu_mapped_buf *m, + struct vm_gk20a *vm, + struct gk20a *g) { - struct gk20a *g = gk20a_from_vm(as_share->vm); - int err = 0; -#if OLD_WALK - struct nvgpu_rbtree_node *node; -#endif - struct nvgpu_mapped_buf *m; struct sg_table *sgt; - struct vm_gk20a *vm = as_share->vm; - struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd); + int err = 0; nvgpu_log_fn(g, " "); - if (!dmabuf) - return -EBADF; - // Other code walking vm->mapped_buffers grabs this lock - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - -#if OLD_WALK - // Get mapped buffer corresponding to this dmabuf - // TODO: Error on buffer mapped >1 - for_each_buffer(node, vm->mapped_buffers, m) { - if (m->os_priv.dmabuf == dmabuf) - break; - } - // If failed search - if (!node || !m) { - // No mapped dmabuf associated with FD - err = -EBADFD; - goto out_put_unlock; - } -#else - m = dmabuf_to_mapped_buf(dmabuf); - // If failed search - if (IS_ERR(m)) { - // No mapped dmabuf associated with FD - err = -EBADFD; - goto out_put_unlock; - } -#endif - // Reallocate space for this buffer err = nvmap_realloc_dmabuf(dmabuf); - if (err) { - // Out of memory (?) - goto out_put_unlock; - } + if (err) + goto out; // Out of memory (?) // Repin the buffer to DMA'able memory sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf, @@ -485,14 +441,15 @@ static int nvgpu_as_ioctl_read_swap_buffer( // Rollback allocation err = nvmap_dealloc_dmabuf(dmabuf); if (err) - printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd); + printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf '%s' to be in an inconsistent state!\n", err, dmabuf->exp_name); err = PTR_ERR(sgt); - goto out_put_unlock; + goto out; } // Do any bookeeping not done by gk20a_mm_pin() m->os_priv.sgt = sgt; // Reload page contents from disk (blocking) + // TODO: non-blocking err = copy_in(sgt); if (err) { int err2; @@ -501,20 +458,126 @@ static int nvgpu_as_ioctl_read_swap_buffer( m->os_priv.attachment, m->os_priv.sgt); err2 = nvmap_dealloc_dmabuf(dmabuf); if (err2) - printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd); + printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf '%s' to be in an inconsistent state!\n", err2, dmabuf->exp_name); // Inaccessible swap device, etc - goto out_put_unlock; + goto out; } + +out: + return err; +} + +/* Finish a swap-in operation by mapping the pages and waiting on I/O to + * complete. + */ +static int nvgpu_as_ioctl_read_swap_buffer_finish( + struct dma_buf *dmabuf, + struct nvgpu_mapped_buf *m, + struct vm_gk20a *vm, + struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + // Update GPU page tables (PT) to point to new allocation nvgpu_vm_remap(m); // Due to PT update, translation lookaside buffer needs clearing g->ops.fb.tlb_invalidate(g, vm->pdb.mem); // Invalidate L2 so that TLB refill does not load stale PT gk20a_mm_l2_flush(g, true); + // Wait for read to complete if it hasn't yet + //wait_for_completion_io(m->os_priv.swap_completion); + + return err; +} + +#define NVGPU_SWAP_ALL -1 + +/* All swap functions require some common boilerplate. This function serves as + * a common entrypoint for all swap functions my handling that boilerplate, + * with includes input validation and locking for all functions. + * @param cmd IOCTL command code + */ +static int nvgpu_as_dev_ioctl_swap( + unsigned int cmd, + struct gk20a_as_share *as_share, + struct nvgpu_as_swap_buffer_args *args) { + struct vm_gk20a *vm = as_share->vm; + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *m; + struct dma_buf *dmabuf; + int err; + // Other code walking vm->mapped_buffers grabs this lock + // Note that we don't really need to do this before getting the dmabuf, + // but we do for now to limit code complexity. + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + // Grab dmabuf and mapped_buf (if necessary) depending on op type + if (args->dmabuf_fd != NVGPU_SWAP_ALL) { + // If not swapping out everything, get dmabuf, then mapped_buf + dmabuf = dma_buf_get(args->dmabuf_fd); + if (IS_ERR(dmabuf)) { + err = PTR_ERR(dmabuf); + goto out_unlock; + } + // Get mapped buffer corresponding to this dmabuf + m = dmabuf_to_mapped_buf(dmabuf); + // If failed search + if (IS_ERR(m)) { + // No mapped dmabuf associated with FD + err = -EBADFD; + goto out_put_unlock; + } + } else { + // When swapping everything, we get buffers by walking the + // mapped_buf rbtree and then use those to get the dmabuf. + // TODO + //dmabuf = mapped_buf_to_dmabuf(m); + err = -EBADFD; // Not yet supported + goto out_unlock; + } + + // Run appropriate command + // XXX: Validate that buffer state is valid for the requested command. + switch (cmd) { + case NVGPU_AS_IOCTL_READ_SWAP_BUFFER: + // Just a combo of the _ASYNC versions. Saves caller a lock, + // some lookups, and an extra syscall. Partially kept for + // legacy reasons. + err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g); + if (err) + goto out_put_unlock; + err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g); + break; + case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC: + err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g); + break; + case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH: + err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g); + break; + case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER: + // See comment on NVGPU_AS_IOCTL_READ_SWAP_BUFFER above + err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g); + if (err) + goto out_put_unlock; + err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g); + break; + case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC: + err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g); + break; + case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH: + err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g); + break; + default: + err = -ENOTTY; + break; + } out_put_unlock: // Done with dmabuf, so release our ref to it dma_buf_put(dmabuf); +out_unlock: nvgpu_mutex_release(&vm->update_gmmu_lock); return err; } @@ -602,11 +665,12 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) (struct nvgpu_as_get_sync_ro_map_args *)buf); break; case NVGPU_AS_IOCTL_READ_SWAP_BUFFER: - err = nvgpu_as_ioctl_read_swap_buffer(as_share, - (struct nvgpu_as_swap_buffer_args *)buf); - break; + case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC: + case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH: case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER: - err = nvgpu_as_ioctl_write_swap_buffer(as_share, + case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC: + case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH: + err = nvgpu_as_dev_ioctl_swap(cmd, as_share, (struct nvgpu_as_swap_buffer_args *)buf); break; default: diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 0138b720..b8ea59a1 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -2206,9 +2206,17 @@ struct nvgpu_as_swap_buffer_args { _IOW(NVGPU_AS_IOCTL_MAGIC, 13, struct nvgpu_as_swap_buffer_args) #define NVGPU_AS_IOCTL_READ_SWAP_BUFFER \ _IOW(NVGPU_AS_IOCTL_MAGIC, 14, struct nvgpu_as_swap_buffer_args) +#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC \ + _IOW(NVGPU_AS_IOCTL_MAGIC, 15, struct nvgpu_as_swap_buffer_args) +#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC \ + _IOW(NVGPU_AS_IOCTL_MAGIC, 16, struct nvgpu_as_swap_buffer_args) +#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH \ + _IOW(NVGPU_AS_IOCTL_MAGIC, 17, struct nvgpu_as_swap_buffer_args) +#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH \ + _IOW(NVGPU_AS_IOCTL_MAGIC, 18, struct nvgpu_as_swap_buffer_args) #define NVGPU_AS_IOCTL_LAST \ - _IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER) + _IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH) #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_as_map_buffer_ex_args) -- cgit v1.2.2