From ee26a2842ca891d3ae8b1de1b066d29234fc0115 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Tue, 24 May 2022 21:11:59 -0400 Subject: gpu-paging: Initial working implementation Supports synchronous page out or in of a specific buffer. Includes fast reverse struct mapped_buf lookup. Requires initial set of changes to nvmap as well. --- drivers/gpu/nvgpu/common/mm/gmmu.c | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'drivers/gpu/nvgpu/common') diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 748e9f45..a04e501f 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -36,6 +36,9 @@ #include "gk20a/mm_gk20a.h" +// XXX: Shouldn't really be here! Needed for __nvgpu_update_paddr() +#include + #define __gmmu_dbg(g, attrs, fmt, args...) \ do { \ if (attrs->debug) { \ @@ -938,3 +941,54 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) return 0; } + +u64 pgsz_enum_to_bytes(int sz) { + if (sz == GMMU_PAGE_SIZE_SMALL) + return SZ_4K; + else + return SZ_64K; // Dangerous! Big pages may also be 128k. Should check ram_in_big_page_size... registers. +} + +// Caller is responsible for TLB/L2 flushing so that this can be called +// repeatedly with low overhead. +int __nvgpu_update_paddr(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u64 paddr) +{ + struct nvgpu_gmmu_pd *pd; + u32 pd_idx, pd_offs; + int err; + u32 pte[2]; // Safe for at least gv11b + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; +// u32 pte_orig[2]; + + // Get existing pte entry and location + err = __nvgpu_locate_pte(g, vm, &vm->pdb, + vaddr, 0, &attrs, + pte, &pd, &pd_idx, &pd_offs); + if (unlikely(err)) { + printk(KERN_ERR "nvgpu: Unable to find PTE for vaddr %llx in __nvgpu_update_paddr()\n", vaddr); + return err; + } + // TODO: Verify that the PTE is actually in SYSMEM +// pte_orig[0] = pte[0]; +// pte_orig[1] = pte[1]; + + // Following logic is borrowed from __update_pte() for gp10b+ + // TODO: Make this work for gk20a-gp10b! + // Zero-out the address field + pte[0] &= ~gmmu_new_pte_address_sys_f(~0 >> gmmu_new_pte_address_shift_v()); + pte[1] &= ~(~0U >> (24 + gmmu_new_pte_address_shift_v())); + // Write new address (upper and lower bits) + pte[0] |= gmmu_new_pte_address_sys_f(paddr >> gmmu_new_pte_address_shift_v()); + pte[1] |= paddr >> (24 + gmmu_new_pte_address_shift_v()); + // Commit to the page tables + pd_write(g, pd, pd_offs, pte[0]); + pd_write(g, pd, pd_offs + 1, pte[1]); + nvgpu_wmb(); // XXX: Is this needed? +// printk(KERN_INFO "nvgpu: Mapped vaddr %llx @ paddr %llx. %lluKb pg. [%08x, %08x]\n", vaddr, paddr, pgsz_enum_to_bytes(attrs.pgsz)/1024, pte[1], pte[0]); +// if (pte_orig[0] != pte[0] || pte_orig[1] != pte[1]) { +// printk(KERN_INFO "nvgpu: Updated PTE entry from {%x,%x} to {%x, %x}\n", pte_orig[0], pte_orig[1], pte[0], pte[1]); +// } + return pgsz_enum_to_bytes(attrs.pgsz); +} -- cgit v1.2.2