From 9296adcd450143f02faf32fbda5b77dba3f03bc7 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Thu, 2 Jun 2022 14:29:53 -0400 Subject: gpu-paging: Allow for more than one buffer to be swapped at a time This uses a very primitive linear disk sector allocation scheme. Sectors are only reused when userspace resets assignment to 0 with an NVGPU_AS_IOCTL_SWAP_RESET ioctl (which invalidates all current swap buffers). This sector assignment scheme is sufficient for use in a TimeWall- like system, where all allocations are assumed to be static after after task system release. This is not suitable for a system with dynamic allocations, unless userspace manually resets swap state regularly (benchmarks run a reset at start for example). Support for dynamic allocations is on the backlog. No significant speed impact. Benchmarks, 100 iters, after: gpu_paging_speed, write: 186.0ms +/- 3.51 gpu_paging_speed, read: 162.7ms +/- 2.58 gpu_paging_overhead_speed, write start: 35.4ms +/- 4.47 gpu_paging_overhead_speed, write finish: 3.3ms +/- 0.18 gpu_paging_overhead_speed, read start: 69.8ms +/- 6.42 gpu_paging_overhead_speed, read finish: 43.2ms +/- 0.91 --- drivers/gpu/nvgpu/include/nvgpu/linux/vm.h | 3 +++ drivers/gpu/nvgpu/os/linux/ioctl_as.c | 8 ++++++++ drivers/gpu/nvgpu/os/linux/swap.h | 31 ++++++++++++++++++++++++++---- drivers/gpu/nvgpu/os/linux/vm.c | 1 + 4 files changed, 39 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h index 85abce6f..4fa4242c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h @@ -51,7 +51,10 @@ struct nvgpu_mapped_buf_priv { struct sg_table *sgt; // For fast reverse lookup (FD -> mapped_buf) struct list_head nvmap_priv_entry; + // To allow waiting on swap I/O completion struct completion swap_io_done; + // Sector assignment for swapped-out data + sector_t swap_sector; }; /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 6348bb2a..2bf8363a 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -672,6 +672,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = nvgpu_as_dev_ioctl_swap(cmd, as_share, (struct nvgpu_as_swap_buffer_args *)buf); break; + case NVGPU_AS_IOCTL_SWAP_RESET: + // On-disk sector assignment is linear currently, and needs to + // be reset to the start between task systems to avoid disk + // space exhaustion. + // TODO: Support garbage-collection- or callback-driven sector + // reclaiming rather than requiring manual reset. + atomic64_set(&nvgpu_swap_next_sector, 4); + break; default: err = -ENOTTY; break; diff --git a/drivers/gpu/nvgpu/os/linux/swap.h b/drivers/gpu/nvgpu/os/linux/swap.h index 1e986095..3a648b26 100644 --- a/drivers/gpu/nvgpu/os/linux/swap.h +++ b/drivers/gpu/nvgpu/os/linux/swap.h @@ -1,8 +1,12 @@ #include #include -//#include +#include // For SECTOR_SHIFT + +// Next sector to assign a mapped_buf to. Skip first disk block +atomic64_t nvgpu_swap_next_sector = {4}; // Callback for completion of the I/O chain +// TODO: Error checking and handling static void complete_swap_io(struct bio *bio) { struct nvgpu_mapped_buf *m = bio->bi_private; bio_put(bio); @@ -11,21 +15,38 @@ static void complete_swap_io(struct bio *bio) { // Queue a command to copy out an SGT to disk // TODO: Cache bdev -// TODO: Don't hardcode sector 0 -// TODO: Figure out if submit_bio() can fail, and what to do then +// TODO: Track, allocate, and recycle individual swap buffers on disk instead +// of only supporting a global reset int copy(struct sg_table *sgt, int op, struct nvgpu_mapped_buf *m) { unsigned int i; struct scatterlist *sg; struct bio *bio; int err = 0; int sg_cnt = sgt->nents; - sector_t sector = 0; // XXX: For testing + sector_t sector = m->os_priv.swap_sector; // Find and open the block device struct block_device *bdev = blkdev_get_by_path("/dev/nvme0n1", FMODE_READ | FMODE_WRITE, copy); if (unlikely(IS_ERR(bdev))) { printk(KERN_WARNING "Unabled to find `nvme0`, err %ld!\n", PTR_ERR(bdev)); return -ENODEV; } + // Assign a sector on-disk (0 indicates unassigned, we start at 4) + if (sector == 0) { + // Read block device size in sectors, and fail if we'd use more than 1/3rd + // of the disk (to stay in SLC-emulation-mode). + // TODO: Issue NVMe DSM commands to try to manage this better? Read-only + // regions should be able to be moved to TLC safely, whereas other + // data should be kept in the SLC cache to reduce wear. + if (atomic64_read(&nvgpu_swap_next_sector) >= i_size_read(bdev->bd_inode)/3) { + err = -ENOMEM; + goto out_put; + } + // Hand out sectors sequentially, and statically + // TODO: Intelligent sector allocation + sector = atomic64_add_return(m->size >> SECTOR_SHIFT, &nvgpu_swap_next_sector); + sector -= (m->size >> SECTOR_SHIFT); + m->os_priv.swap_sector = sector; + } // Reset the .done variable in the completion reinit_completion(&m->os_priv.swap_io_done); // bio_alloc() will never fail when allocating <= BIO_MAX_PAGES @@ -58,8 +79,10 @@ int copy(struct sg_table *sgt, int op, struct nvgpu_mapped_buf *m) { } // Async submit. Caller should wait_for_completion_io(&m->os_priv.swap_io_done); + // Does not fail. Error reported via completion handler. submit_bio(bio); +out_put: // Release our block device handle blkdev_put(bdev, FMODE_WRITE | FMODE_READ); // Is this safe? return err; diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index 9cd17981..a1c19a3a 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -269,6 +269,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, else // So we can always safely call list_del() INIT_LIST_HEAD(&mapped_buffer->os_priv.nvmap_priv_entry); + mapped_buffer->os_priv.swap_sector = 0; *gpu_va = mapped_buffer->addr; return 0; -- cgit v1.2.2