From ee26a2842ca891d3ae8b1de1b066d29234fc0115 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Tue, 24 May 2022 21:11:59 -0400 Subject: gpu-paging: Initial working implementation Supports synchronous page out or in of a specific buffer. Includes fast reverse struct mapped_buf lookup. Requires initial set of changes to nvmap as well. --- drivers/gpu/nvgpu/os/linux/vm.c | 70 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/os/linux/vm.c') diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index 8956cce5..fcb58ac4 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -71,7 +72,23 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( { struct nvgpu_rbtree_node *node = NULL; struct nvgpu_rbtree_node *root = vm->mapped_buffers; + struct list_head* nvmap_priv; + + // Try fast lookup first + if (!IS_ERR(nvmap_priv = nvmap_get_priv_list(dmabuf))) { + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_mapped_buf_priv *priv; + + list_for_each_entry(priv, nvmap_priv, nvmap_priv_entry) { + mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv); + if (mapped_buffer->os_priv.dmabuf == dmabuf && + mapped_buffer->kind == kind) + return mapped_buffer; + } + } + // Full traversal (not an nvmap buffer?) + printk(KERN_INFO "nvmap: Fast reverse lookup failed!"); nvgpu_rbtree_enum_start(0, &node, root); while (node) { @@ -158,6 +175,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, */ gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, mapped_buffer->os_priv.sgt); + list_del(&mapped_buffer->os_priv.nvmap_priv_entry); dma_buf_put(os_buf->dmabuf); nvgpu_log(g, gpu_dbg_map, @@ -198,6 +216,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, struct nvgpu_sgt *nvgpu_sgt = NULL; struct nvgpu_mapped_buf *mapped_buffer = NULL; struct dma_buf_attachment *attachment; + struct list_head *nvmap_priv; int err = 0; sgt = gk20a_mm_pin(dev, dmabuf, &attachment); @@ -243,6 +262,12 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, mapped_buffer->os_priv.dmabuf = dmabuf; mapped_buffer->os_priv.attachment = attachment; mapped_buffer->os_priv.sgt = sgt; + nvmap_priv = nvmap_get_priv_list(dmabuf); + if (!IS_ERR(nvmap_priv)) + list_add(&mapped_buffer->os_priv.nvmap_priv_entry, nvmap_priv); + else + // So we can always safely call list_del() + INIT_LIST_HEAD(&mapped_buffer->os_priv.nvmap_priv_entry); *gpu_va = mapped_buffer->addr; return 0; @@ -353,6 +378,49 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, mapped_buffer->os_priv.attachment, mapped_buffer->os_priv.sgt); - + list_del(&mapped_buffer->os_priv.nvmap_priv_entry); dma_buf_put(mapped_buffer->os_priv.dmabuf); } + +/** + * Given an nvgpu_mapped_buf m, map m->os_priv.sgt into m->addr + * Very similar to nvgpu_vm_map_buffer, except that this assumes all necessary + * PTEs and PDEs have been created. This merely updates the physical address(es) + * in the associated PTEs, leaving all other attributes unchanged. + * + * NOP if sgt is already mapped for addr. + * + * vm->gmmu_update_lock must be held. + * + * Caller is responsible for flushing the TLB and L2 caches. + */ +void nvgpu_vm_remap(struct nvgpu_mapped_buf *m) +{ + // TODO: Input validation + struct scatterlist *sg; + unsigned int i = 0; + u64 curr_vaddr = m->addr; + + // For each element of the scatterlist + // (based off for_each_sgtable_dma_sg() macro in newer kernels) + for_each_sg(m->os_priv.sgt->sgl, sg, m->os_priv.sgt->nents, i) { + unsigned int sg_off = 0; + // Keep mapping data at the next unmapped virtual address + // until each scatterlist element is entirely mapped + while (sg_off < sg_dma_len(sg)) { + int amt_mapped = __nvgpu_update_paddr(gk20a_from_vm(m->vm), + m->vm, + curr_vaddr, + sg_dma_address(sg) + sg_off); + if (amt_mapped < 0) { + printk(KERN_ERR "nvgpu: Error %d from __nvgpu_update_paddr() in nvgpu_vm_remap()! Had mapped %llu of %llu bytes.\n", amt_mapped, curr_vaddr - m->addr, m->size); + return; + } + curr_vaddr += amt_mapped; + sg_off += amt_mapped; + } + } + if (curr_vaddr != m->addr + m->size) { + printk(KERN_ERR "nvgpu: Mapped %llu bytes when %llu bytes expected! Expect page table corruption!\n", curr_vaddr - m->addr, m->size); + } +} -- cgit v1.2.2