gpu-paging: Split swap in/out to prepare for async support.

author: Joshua Bakita <jbakita@cs.unc.edu> 2022-05-25 22:01:24 -0400
committer: Joshua Bakita <jbakita@cs.unc.edu> 2022-05-30 12:19:42 -0400
commit: ff66847a00ac27d8d94b3664ec156a195dbf3676 (patch)
tree: 388ffb2866b1c81279253866349d4b63c30d7745
parent: ee26a2842ca891d3ae8b1de1b066d29234fc0115 (diff)
2 files changed, 175 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index 9708ea1a..af6cdb5b 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -332,75 +332,68 @@ int gk20a_as_dev_release(struct inode *inode, struct file *filp)
        return gk20a_as_release_share(as_share);
 }
-#define OLD_WALK 0
 /* Access dmabuf associated with passed file descriptor, copy the associated
 * pages to an NVME drive, unpin associated pages from DMA'able space, and free
 * said pages for use by others.
 * dmabuf is put in a deallocated state, and any GPU mappings will be
 * invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer().
+ * ...
+ * Starts a swap-out operation by flushing the GPU L2 and starting I/O.
+ * vm->update_gmmu_lock /must/ already be held.
 */
 static int nvgpu_as_ioctl_write_swap_buffer(
-                struct gk20a_as_share *as_share,
+                struct dma_buf *dmabuf,
-                struct nvgpu_as_swap_buffer_args *args)
+                struct nvgpu_mapped_buf *m,
+                struct vm_gk20a *vm,
+                struct gk20a *g)
 {
-        struct gk20a *g = gk20a_from_vm(as_share->vm);
        int err = 0;
-#if OLD_WALK
-        struct nvgpu_rbtree_node *node;
-#endif
-        struct nvgpu_mapped_buf *m;
-        struct sg_table *sgt;
-        struct vm_gk20a *vm = as_share->vm;
-        struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
        nvgpu_log_fn(g, " ");
-        if (IS_ERR(dmabuf))
-                return PTR_ERR(dmabuf);
-        // Other code walking vm->mapped_buffers grabs this lock
-        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-#if OLD_WALK
-        // Get mapped buffer corresponding to this dmabuf
-        // TODO: Error on buffer mapped >1
-        for_each_buffer(node, vm->mapped_buffers, m) {
-                if (m->os_priv.dmabuf == dmabuf)
-                        break;
-        }
-        // If failed search
-        if (!node || !m) {
-                // No mapped dmabuf associated with FD
-                err = -EBADFD;
-                goto out_put_unlock;
-        }
-#else
-        m = dmabuf_to_mapped_buf(dmabuf);
-        // If failed search
-        if (IS_ERR(m)) {
-                // No mapped dmabuf associated with FD
-                err = -EBADFD;
-                goto out_put_unlock;
-        }
-#endif
        // Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap
        if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) {
                err = -ENOTRECOVERABLE;
-                goto out_put_unlock;
+                goto out;
        }
+        // TODO: Verify that we'll likely be able to free the pages later
+        //       before we start the copy.
        // Flush dirty GPU L2 cache lines to DRAM
        // (Assuming that NVMe DRAM acceses are uncached)
        gk20a_mm_l2_flush(g, false);
-        // Copy out (blocking)
+        // Copy out (blocking) TODO: non-blocking
+        // Could fail on inaccessible swap device, etc
        err = copy_out(m->os_priv.sgt);
-        if (err) {
-                // Inaccessible swap device, etc
+out:
-                goto out_put_unlock;
+        return err;
-        }
+}
+/* Finish a swap-out operation by waiting on I/O to complete, then unpinning
+ * and freeing the pages.
+ *
+ * Note that this may fail even if nvgpu_as_ioctl_write_swap_buffer()
+ * succeeded if the user mmaps the buffer before finishing the swap-out.
+ *
+ * vm->update_gmmu_lock /must/ already be held.
+ */
+static int nvgpu_as_ioctl_write_swap_buffer_finish(
+                struct dma_buf *dmabuf,
+                struct nvgpu_mapped_buf *m,
+                struct vm_gk20a *vm,
+                struct gk20a *g)
+{
+        struct sg_table *sgt;
+        int err = 0;
+        nvgpu_log_fn(g, " ");
+        // Wait for the pages to get written out
+        //wait_for_completion_io(m->os_priv.swap_completion);
        // Unpin needs to happen after copy out is done
        // (No return value check as it's a void function)
@@ -416,67 +409,30 @@ static int nvgpu_as_ioctl_write_swap_buffer(
                sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
                                   &m->os_priv.attachment);
                m->os_priv.sgt = sgt;
-                goto out_put_unlock;
        }
-out_put_unlock:
-        // Done with dmabuf, so release our ref to it
-        dma_buf_put(dmabuf);
-        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return err;
 }
-// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does
+/* Starts a swap-in operation by allocating and pinning backing pages, and
+ * starting I/O.
+ * vm->update_gmmu_lock /must/ already be held.
+ */
 static int nvgpu_as_ioctl_read_swap_buffer(
-                struct gk20a_as_share *as_share,
+                struct dma_buf *dmabuf,
-                struct nvgpu_as_swap_buffer_args *args)
+                struct nvgpu_mapped_buf *m,
+                struct vm_gk20a *vm,
+                struct gk20a *g)
 {
-        struct gk20a *g = gk20a_from_vm(as_share->vm);
-        int err = 0;
-#if OLD_WALK
-        struct nvgpu_rbtree_node *node;
-#endif
-        struct nvgpu_mapped_buf *m;
        struct sg_table *sgt;
-        struct vm_gk20a *vm = as_share->vm;
+        int err = 0;
-        struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
        nvgpu_log_fn(g, " ");
-        if (!dmabuf)
-                return -EBADF;
-        // Other code walking vm->mapped_buffers grabs this lock
-        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-#if OLD_WALK
-        // Get mapped buffer corresponding to this dmabuf
-        // TODO: Error on buffer mapped >1
-        for_each_buffer(node, vm->mapped_buffers, m) {
-                if (m->os_priv.dmabuf == dmabuf)
-                        break;
-        }
-        // If failed search
-        if (!node || !m) {
-                // No mapped dmabuf associated with FD
-                err = -EBADFD;
-                goto out_put_unlock;
-        }
-#else
-        m = dmabuf_to_mapped_buf(dmabuf);
-        // If failed search
-        if (IS_ERR(m)) {
-                // No mapped dmabuf associated with FD
-                err = -EBADFD;
-                goto out_put_unlock;
-        }
-#endif
        // Reallocate space for this buffer
        err = nvmap_realloc_dmabuf(dmabuf);
-        if (err) {
+        if (err)
-                // Out of memory (?)
+                goto out; // Out of memory (?)
-                goto out_put_unlock;
-        }
        // Repin the buffer to DMA'able memory
        sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
@@ -485,14 +441,15 @@ static int nvgpu_as_ioctl_read_swap_buffer(
                // Rollback allocation
                err = nvmap_dealloc_dmabuf(dmabuf);
                if (err)
-                        printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd);
+                        printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf '%s' to be in an inconsistent state!\n", err, dmabuf->exp_name);
                err = PTR_ERR(sgt);
-                goto out_put_unlock;
+                goto out;
        }
        // Do any bookeeping not done by gk20a_mm_pin()
        m->os_priv.sgt = sgt;
        // Reload page contents from disk (blocking)
+        // TODO: non-blocking
        err = copy_in(sgt);
        if (err) {
                int err2;
@@ -501,20 +458,126 @@ static int nvgpu_as_ioctl_read_swap_buffer(
                               m->os_priv.attachment, m->os_priv.sgt);
                err2 = nvmap_dealloc_dmabuf(dmabuf);
                if (err2)
-                        printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd);
+                        printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf '%s' to be in an inconsistent state!\n", err2, dmabuf->exp_name);
                // Inaccessible swap device, etc
-                goto out_put_unlock;
+                goto out;
        }
+out:
+        return err;
+}
+/* Finish a swap-in operation by mapping the pages and waiting on I/O to
+ * complete.
+ */
+static int nvgpu_as_ioctl_read_swap_buffer_finish(
+                struct dma_buf *dmabuf,
+                struct nvgpu_mapped_buf *m,
+                struct vm_gk20a *vm,
+                struct gk20a *g)
+{
+        int err = 0;
+        nvgpu_log_fn(g, " ");
        // Update GPU page tables (PT) to point to new allocation
        nvgpu_vm_remap(m);
        // Due to PT update, translation lookaside buffer needs clearing
        g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
        // Invalidate L2 so that TLB refill does not load stale PT
        gk20a_mm_l2_flush(g, true);
+        // Wait for read to complete if it hasn't yet
+        //wait_for_completion_io(m->os_priv.swap_completion);
+        return err;
+}
+#define NVGPU_SWAP_ALL -1
+/* All swap functions require some common boilerplate. This function serves as
+ * a common entrypoint for all swap functions my handling that boilerplate,
+ * with includes input validation and locking for all functions.
+ * @param cmd IOCTL command code
+ */
+static int nvgpu_as_dev_ioctl_swap(
+                unsigned int cmd,
+                struct gk20a_as_share *as_share,
+                struct nvgpu_as_swap_buffer_args *args) {
+        struct vm_gk20a *vm = as_share->vm;
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_mapped_buf *m;
+        struct dma_buf *dmabuf;
+        int err;
+        // Other code walking vm->mapped_buffers grabs this lock
+        // Note that we don't really need to do this before getting the dmabuf,
+        // but we do for now to limit code complexity.
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        // Grab dmabuf and mapped_buf (if necessary) depending on op type
+        if (args->dmabuf_fd != NVGPU_SWAP_ALL) {
+                // If not swapping out everything, get dmabuf, then mapped_buf
+                dmabuf = dma_buf_get(args->dmabuf_fd);
+                if (IS_ERR(dmabuf)) {
+                        err = PTR_ERR(dmabuf);
+                        goto out_unlock;
+                }
+                // Get mapped buffer corresponding to this dmabuf
+                m = dmabuf_to_mapped_buf(dmabuf);
+                // If failed search
+                if (IS_ERR(m)) {
+                        // No mapped dmabuf associated with FD
+                        err = -EBADFD;
+                        goto out_put_unlock;
+                }
+        } else {
+                // When swapping everything, we get buffers by walking the
+                // mapped_buf rbtree and then use those to get the dmabuf.
+                // TODO
+                //dmabuf = mapped_buf_to_dmabuf(m);
+                err = -EBADFD; // Not yet supported
+                goto out_unlock;
+        }
+        // Run appropriate command
+        // XXX: Validate that buffer state is valid for the requested command.
+        switch (cmd) {
+        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
+                // Just a combo of the _ASYNC versions. Saves caller a lock,
+                // some lookups, and an extra syscall. Partially kept for
+                // legacy reasons.
+                err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g);
+                if (err)
+                        goto out_put_unlock;
+                err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g);
+                break;
+        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC:
+                err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g);
+                break;
+        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH:
+                err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g);
+                break;
+        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
+                // See comment on NVGPU_AS_IOCTL_READ_SWAP_BUFFER above
+                err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g);
+                if (err)
+                        goto out_put_unlock;
+                err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g);
+                break;
+        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC:
+                err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g);
+                break;
+        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH:
+                err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g);
+                break;
+        default:
+                err = -ENOTTY;
+                break;
+        }
 out_put_unlock:
        // Done with dmabuf, so release our ref to it
        dma_buf_put(dmabuf);
+out_unlock:
        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return err;
 }
@@ -602,11 +665,12 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        (struct nvgpu_as_get_sync_ro_map_args *)buf);
                break;
        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
-                err = nvgpu_as_ioctl_read_swap_buffer(as_share,
+        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC:
-                        (struct nvgpu_as_swap_buffer_args *)buf);
+        case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH:
-                break;
        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
-                err = nvgpu_as_ioctl_write_swap_buffer(as_share,
+        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC:
+        case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH:
+                err = nvgpu_as_dev_ioctl_swap(cmd, as_share,
                        (struct nvgpu_as_swap_buffer_args *)buf);
                break;
        default:
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 0138b720..b8ea59a1 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -2206,9 +2206,17 @@ struct nvgpu_as_swap_buffer_args {
        _IOW(NVGPU_AS_IOCTL_MAGIC,  13, struct nvgpu_as_swap_buffer_args)
 #define NVGPU_AS_IOCTL_READ_SWAP_BUFFER \
        _IOW(NVGPU_AS_IOCTL_MAGIC,  14, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC  \
+        _IOW(NVGPU_AS_IOCTL_MAGIC,  15, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC   \
+        _IOW(NVGPU_AS_IOCTL_MAGIC,  16, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH   \
+        _IOW(NVGPU_AS_IOCTL_MAGIC,  17, struct nvgpu_as_swap_buffer_args)
+#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH    \
+        _IOW(NVGPU_AS_IOCTL_MAGIC,  18, struct nvgpu_as_swap_buffer_args)
 #define NVGPU_AS_IOCTL_LAST            \
-        _IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER)
+        _IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH)
 #define NVGPU_AS_IOCTL_MAX_ARG_SIZE     \
        sizeof(struct nvgpu_as_map_buffer_ex_args)
author	Joshua Bakita <jbakita@cs.unc.edu>	2022-05-25 22:01:24 -0400
committer	Joshua Bakita <jbakita@cs.unc.edu>	2022-05-30 12:19:42 -0400
commit	ff66847a00ac27d8d94b3664ec156a195dbf3676 (patch)
tree	388ffb2866b1c81279253866349d4b63c30d7745
parent	ee26a2842ca891d3ae8b1de1b066d29234fc0115 (diff)

diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 9708ea1a..af6cdb5b 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -332,75 +332,68 @@ int gk20a_as_dev_release(struct inode inode, struct file filp)
332		332
333	return gk20a_as_release_share(as_share);	333	return gk20a_as_release_share(as_share);
334	}	334	}
335	#define OLD_WALK 0
336		335
337	/* Access dmabuf associated with passed file descriptor, copy the associated	336	/* Access dmabuf associated with passed file descriptor, copy the associated
338	* pages to an NVME drive, unpin associated pages from DMA'able space, and free	337	* pages to an NVME drive, unpin associated pages from DMA'able space, and free
339	* said pages for use by others.	338	* said pages for use by others.
340	* dmabuf is put in a deallocated state, and any GPU mappings will be	339	* dmabuf is put in a deallocated state, and any GPU mappings will be
341	* invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer().	340	* invalidated. To restore the dmabuf, see nvgpu_as_ioctl_read_swap_buffer().
		341	* ...
		342	* Starts a swap-out operation by flushing the GPU L2 and starting I/O.
		343	* vm->update_gmmu_lock /must/ already be held.
342	*/	344	*/
343	static int nvgpu_as_ioctl_write_swap_buffer(	345	static int nvgpu_as_ioctl_write_swap_buffer(
344	struct gk20a_as_share *as_share,	346	struct dma_buf *dmabuf,
345	struct nvgpu_as_swap_buffer_args *args)	347	struct nvgpu_mapped_buf *m,
		348	struct vm_gk20a *vm,
		349	struct gk20a *g)
346	{	350	{
347	struct gk20a *g = gk20a_from_vm(as_share->vm);
348	int err = 0;	351	int err = 0;
349	#if OLD_WALK
350	struct nvgpu_rbtree_node *node;
351	#endif
352	struct nvgpu_mapped_buf *m;
353	struct sg_table *sgt;
354	struct vm_gk20a *vm = as_share->vm;
355	struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
356		352
357	nvgpu_log_fn(g, " ");	353	nvgpu_log_fn(g, " ");
358		354
359	if (IS_ERR(dmabuf))
360	return PTR_ERR(dmabuf);
361
362	// Other code walking vm->mapped_buffers grabs this lock
363	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
364
365	#if OLD_WALK
366	// Get mapped buffer corresponding to this dmabuf
367	// TODO: Error on buffer mapped >1
368	for_each_buffer(node, vm->mapped_buffers, m) {
369	if (m->os_priv.dmabuf == dmabuf)
370	break;
371	}
372	// If failed search
373	if (!node \|\| !m) {
374	// No mapped dmabuf associated with FD
375	err = -EBADFD;
376	goto out_put_unlock;
377	}
378	#else
379	m = dmabuf_to_mapped_buf(dmabuf);
380	// If failed search
381	if (IS_ERR(m)) {
382	// No mapped dmabuf associated with FD
383	err = -EBADFD;
384	goto out_put_unlock;
385	}
386	#endif
387
388	// Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap	355	// Disable an annoying custom out-of-tree "feature" of dma_buf which defers unmap
389	if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) {	356	if (dma_buf_disable_lazy_unmapping(dev_from_vm(vm))) {
390	err = -ENOTRECOVERABLE;	357	err = -ENOTRECOVERABLE;
391	goto out_put_unlock;	358	goto out;
392	}	359	}
393		360
		361	// TODO: Verify that we'll likely be able to free the pages later
		362	// before we start the copy.
		363
394	// Flush dirty GPU L2 cache lines to DRAM	364	// Flush dirty GPU L2 cache lines to DRAM
395	// (Assuming that NVMe DRAM acceses are uncached)	365	// (Assuming that NVMe DRAM acceses are uncached)
396	gk20a_mm_l2_flush(g, false);	366	gk20a_mm_l2_flush(g, false);
397		367
398	// Copy out (blocking)	368	// Copy out (blocking) TODO: non-blocking
		369	// Could fail on inaccessible swap device, etc
399	err = copy_out(m->os_priv.sgt);	370	err = copy_out(m->os_priv.sgt);
400	if (err) {	371
401	// Inaccessible swap device, etc	372	out:
402	goto out_put_unlock;	373	return err;
403	}	374	}
		375
		376	/* Finish a swap-out operation by waiting on I/O to complete, then unpinning
		377	* and freeing the pages.
		378	*
		379	* Note that this may fail even if nvgpu_as_ioctl_write_swap_buffer()
		380	* succeeded if the user mmaps the buffer before finishing the swap-out.
		381	*
		382	* vm->update_gmmu_lock /must/ already be held.
		383	*/
		384	static int nvgpu_as_ioctl_write_swap_buffer_finish(
		385	struct dma_buf *dmabuf,
		386	struct nvgpu_mapped_buf *m,
		387	struct vm_gk20a *vm,
		388	struct gk20a *g)
		389	{
		390	struct sg_table *sgt;
		391	int err = 0;
		392
		393	nvgpu_log_fn(g, " ");
		394
		395	// Wait for the pages to get written out
		396	//wait_for_completion_io(m->os_priv.swap_completion);
404		397
405	// Unpin needs to happen after copy out is done	398	// Unpin needs to happen after copy out is done
406	// (No return value check as it's a void function)	399	// (No return value check as it's a void function)
@@ -416,67 +409,30 @@ static int nvgpu_as_ioctl_write_swap_buffer(
416	sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,	409	sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
417	&m->os_priv.attachment);	410	&m->os_priv.attachment);
418	m->os_priv.sgt = sgt;	411	m->os_priv.sgt = sgt;
419	goto out_put_unlock;
420	}	412	}
421		413
422	out_put_unlock:
423	// Done with dmabuf, so release our ref to it
424	dma_buf_put(dmabuf);
425	nvgpu_mutex_release(&vm->update_gmmu_lock);
426	return err;	414	return err;
427	}	415	}
428		416
429	// Undoes everything nvgpu_as_ioctl_write_swap_buffer() does	417	/* Starts a swap-in operation by allocating and pinning backing pages, and
		418	* starting I/O.
		419	* vm->update_gmmu_lock /must/ already be held.
		420	*/
430	static int nvgpu_as_ioctl_read_swap_buffer(	421	static int nvgpu_as_ioctl_read_swap_buffer(
431	struct gk20a_as_share *as_share,	422	struct dma_buf *dmabuf,
432	struct nvgpu_as_swap_buffer_args *args)	423	struct nvgpu_mapped_buf *m,
		424	struct vm_gk20a *vm,
		425	struct gk20a *g)
433	{	426	{
434	struct gk20a *g = gk20a_from_vm(as_share->vm);
435	int err = 0;
436	#if OLD_WALK
437	struct nvgpu_rbtree_node *node;
438	#endif
439	struct nvgpu_mapped_buf *m;
440	struct sg_table *sgt;	427	struct sg_table *sgt;
441	struct vm_gk20a *vm = as_share->vm;	428	int err = 0;
442	struct dma_buf *dmabuf = dma_buf_get(args->dmabuf_fd);
443		429
444	nvgpu_log_fn(g, " ");	430	nvgpu_log_fn(g, " ");
445		431
446	if (!dmabuf)
447	return -EBADF;
448	// Other code walking vm->mapped_buffers grabs this lock
449	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
450
451	#if OLD_WALK
452	// Get mapped buffer corresponding to this dmabuf
453	// TODO: Error on buffer mapped >1
454	for_each_buffer(node, vm->mapped_buffers, m) {
455	if (m->os_priv.dmabuf == dmabuf)
456	break;
457	}
458	// If failed search
459	if (!node \|\| !m) {
460	// No mapped dmabuf associated with FD
461	err = -EBADFD;
462	goto out_put_unlock;
463	}
464	#else
465	m = dmabuf_to_mapped_buf(dmabuf);
466	// If failed search
467	if (IS_ERR(m)) {
468	// No mapped dmabuf associated with FD
469	err = -EBADFD;
470	goto out_put_unlock;
471	}
472	#endif
473
474	// Reallocate space for this buffer	432	// Reallocate space for this buffer
475	err = nvmap_realloc_dmabuf(dmabuf);	433	err = nvmap_realloc_dmabuf(dmabuf);
476	if (err) {	434	if (err)
477	// Out of memory (?)	435	goto out; // Out of memory (?)
478	goto out_put_unlock;
479	}
480		436
481	// Repin the buffer to DMA'able memory	437	// Repin the buffer to DMA'able memory
482	sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,	438	sgt = gk20a_mm_pin(dev_from_vm(vm), m->os_priv.dmabuf,
@@ -485,14 +441,15 @@ static int nvgpu_as_ioctl_read_swap_buffer(
485	// Rollback allocation	441	// Rollback allocation
486	err = nvmap_dealloc_dmabuf(dmabuf);	442	err = nvmap_dealloc_dmabuf(dmabuf);
487	if (err)	443	if (err)
488	printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf FD %d to be in an inconsistent state!\n", err, args->dmabuf_fd);	444	printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in gk20a_mm_pin()! Consider dmabuf '%s' to be in an inconsistent state!\n", err, dmabuf->exp_name);
489	err = PTR_ERR(sgt);	445	err = PTR_ERR(sgt);
490	goto out_put_unlock;	446	goto out;
491	}	447	}
492	// Do any bookeeping not done by gk20a_mm_pin()	448	// Do any bookeeping not done by gk20a_mm_pin()
493	m->os_priv.sgt = sgt;	449	m->os_priv.sgt = sgt;
494		450
495	// Reload page contents from disk (blocking)	451	// Reload page contents from disk (blocking)
		452	// TODO: non-blocking
496	err = copy_in(sgt);	453	err = copy_in(sgt);
497	if (err) {	454	if (err) {
498	int err2;	455	int err2;
@@ -501,20 +458,126 @@ static int nvgpu_as_ioctl_read_swap_buffer(
501	m->os_priv.attachment, m->os_priv.sgt);	458	m->os_priv.attachment, m->os_priv.sgt);
502	err2 = nvmap_dealloc_dmabuf(dmabuf);	459	err2 = nvmap_dealloc_dmabuf(dmabuf);
503	if (err2)	460	if (err2)
504	printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf FD %d to be in an inconsistent state!\n", err2, args->dmabuf_fd);	461	printk(KERN_ERR "nvgpu: Error %d while rolling back dmabuf allocation state on error in copy_in()! Consider dmabuf '%s' to be in an inconsistent state!\n", err2, dmabuf->exp_name);
505	// Inaccessible swap device, etc	462	// Inaccessible swap device, etc
506	goto out_put_unlock;	463	goto out;
507	}	464	}
		465
		466	out:
		467	return err;
		468	}
		469
		470	/* Finish a swap-in operation by mapping the pages and waiting on I/O to
		471	* complete.
		472	*/
		473	static int nvgpu_as_ioctl_read_swap_buffer_finish(
		474	struct dma_buf *dmabuf,
		475	struct nvgpu_mapped_buf *m,
		476	struct vm_gk20a *vm,
		477	struct gk20a *g)
		478	{
		479	int err = 0;
		480
		481	nvgpu_log_fn(g, " ");
		482
508	// Update GPU page tables (PT) to point to new allocation	483	// Update GPU page tables (PT) to point to new allocation
509	nvgpu_vm_remap(m);	484	nvgpu_vm_remap(m);
510	// Due to PT update, translation lookaside buffer needs clearing	485	// Due to PT update, translation lookaside buffer needs clearing
511	g->ops.fb.tlb_invalidate(g, vm->pdb.mem);	486	g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
512	// Invalidate L2 so that TLB refill does not load stale PT	487	// Invalidate L2 so that TLB refill does not load stale PT
513	gk20a_mm_l2_flush(g, true);	488	gk20a_mm_l2_flush(g, true);
		489	// Wait for read to complete if it hasn't yet
		490	//wait_for_completion_io(m->os_priv.swap_completion);
		491
		492	return err;
		493	}
		494
		495	#define NVGPU_SWAP_ALL -1
		496
		497	/* All swap functions require some common boilerplate. This function serves as
		498	* a common entrypoint for all swap functions my handling that boilerplate,
		499	* with includes input validation and locking for all functions.
		500	* @param cmd IOCTL command code
		501	*/
		502	static int nvgpu_as_dev_ioctl_swap(
		503	unsigned int cmd,
		504	struct gk20a_as_share *as_share,
		505	struct nvgpu_as_swap_buffer_args *args) {
		506	struct vm_gk20a *vm = as_share->vm;
		507	struct gk20a *g = gk20a_from_vm(vm);
		508	struct nvgpu_mapped_buf *m;
		509	struct dma_buf *dmabuf;
		510	int err;
		511	// Other code walking vm->mapped_buffers grabs this lock
		512	// Note that we don't really need to do this before getting the dmabuf,
		513	// but we do for now to limit code complexity.
		514	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
		515
		516	// Grab dmabuf and mapped_buf (if necessary) depending on op type
		517	if (args->dmabuf_fd != NVGPU_SWAP_ALL) {
		518	// If not swapping out everything, get dmabuf, then mapped_buf
		519	dmabuf = dma_buf_get(args->dmabuf_fd);
		520	if (IS_ERR(dmabuf)) {
		521	err = PTR_ERR(dmabuf);
		522	goto out_unlock;
		523	}
		524	// Get mapped buffer corresponding to this dmabuf
		525	m = dmabuf_to_mapped_buf(dmabuf);
		526	// If failed search
		527	if (IS_ERR(m)) {
		528	// No mapped dmabuf associated with FD
		529	err = -EBADFD;
		530	goto out_put_unlock;
		531	}
		532	} else {
		533	// When swapping everything, we get buffers by walking the
		534	// mapped_buf rbtree and then use those to get the dmabuf.
		535	// TODO
		536	//dmabuf = mapped_buf_to_dmabuf(m);
		537	err = -EBADFD; // Not yet supported
		538	goto out_unlock;
		539	}
		540
		541	// Run appropriate command
		542	// XXX: Validate that buffer state is valid for the requested command.
		543	switch (cmd) {
		544	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
		545	// Just a combo of the _ASYNC versions. Saves caller a lock,
		546	// some lookups, and an extra syscall. Partially kept for
		547	// legacy reasons.
		548	err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g);
		549	if (err)
		550	goto out_put_unlock;
		551	err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g);
		552	break;
		553	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC:
		554	err = nvgpu_as_ioctl_read_swap_buffer(dmabuf, m, vm, g);
		555	break;
		556	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH:
		557	err = nvgpu_as_ioctl_read_swap_buffer_finish(dmabuf, m, vm, g);
		558	break;
		559	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
		560	// See comment on NVGPU_AS_IOCTL_READ_SWAP_BUFFER above
		561	err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g);
		562	if (err)
		563	goto out_put_unlock;
		564	err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g);
		565	break;
		566	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC:
		567	err = nvgpu_as_ioctl_write_swap_buffer(dmabuf, m, vm, g);
		568	break;
		569	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH:
		570	err = nvgpu_as_ioctl_write_swap_buffer_finish(dmabuf, m, vm, g);
		571	break;
		572	default:
		573	err = -ENOTTY;
		574	break;
		575	}
514		576
515	out_put_unlock:	577	out_put_unlock:
516	// Done with dmabuf, so release our ref to it	578	// Done with dmabuf, so release our ref to it
517	dma_buf_put(dmabuf);	579	dma_buf_put(dmabuf);
		580	out_unlock:
518	nvgpu_mutex_release(&vm->update_gmmu_lock);	581	nvgpu_mutex_release(&vm->update_gmmu_lock);
519	return err;	582	return err;
520	}	583	}
@@ -602,11 +665,12 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
602	(struct nvgpu_as_get_sync_ro_map_args *)buf);	665	(struct nvgpu_as_get_sync_ro_map_args *)buf);
603	break;	666	break;
604	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:	667	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER:
605	err = nvgpu_as_ioctl_read_swap_buffer(as_share,	668	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC:
606	(struct nvgpu_as_swap_buffer_args *)buf);	669	case NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH:
607	break;
608	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:	670	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER:
609	err = nvgpu_as_ioctl_write_swap_buffer(as_share,	671	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC:
		672	case NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH:
		673	err = nvgpu_as_dev_ioctl_swap(cmd, as_share,
610	(struct nvgpu_as_swap_buffer_args *)buf);	674	(struct nvgpu_as_swap_buffer_args *)buf);
611	break;	675	break;
612	default:	676	default:


diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 0138b720..b8ea59a1 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h
@@ -2206,9 +2206,17 @@ struct nvgpu_as_swap_buffer_args {
2206	_IOW(NVGPU_AS_IOCTL_MAGIC, 13, struct nvgpu_as_swap_buffer_args)	2206	_IOW(NVGPU_AS_IOCTL_MAGIC, 13, struct nvgpu_as_swap_buffer_args)
2207	#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER \	2207	#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER \
2208	_IOW(NVGPU_AS_IOCTL_MAGIC, 14, struct nvgpu_as_swap_buffer_args)	2208	_IOW(NVGPU_AS_IOCTL_MAGIC, 14, struct nvgpu_as_swap_buffer_args)
		2209	#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC \
		2210	_IOW(NVGPU_AS_IOCTL_MAGIC, 15, struct nvgpu_as_swap_buffer_args)
		2211	#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC \
		2212	_IOW(NVGPU_AS_IOCTL_MAGIC, 16, struct nvgpu_as_swap_buffer_args)
		2213	#define NVGPU_AS_IOCTL_WRITE_SWAP_BUFFER_ASYNC_FINISH \
		2214	_IOW(NVGPU_AS_IOCTL_MAGIC, 17, struct nvgpu_as_swap_buffer_args)
		2215	#define NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH \
		2216	_IOW(NVGPU_AS_IOCTL_MAGIC, 18, struct nvgpu_as_swap_buffer_args)
2209		2217
2210	#define NVGPU_AS_IOCTL_LAST \	2218	#define NVGPU_AS_IOCTL_LAST \
2211	_IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER)	2219	_IOC_NR(NVGPU_AS_IOCTL_READ_SWAP_BUFFER_ASYNC_FINISH)
2212	#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \	2220	#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
2213	sizeof(struct nvgpu_as_map_buffer_ex_args)	2221	sizeof(struct nvgpu_as_map_buffer_ex_args)
2214		2222