From ee26a2842ca891d3ae8b1de1b066d29234fc0115 Mon Sep 17 00:00:00 2001
From: Joshua Bakita <jbakita@cs.unc.edu>
Date: Tue, 24 May 2022 21:11:59 -0400
Subject: gpu-paging: Initial working implementation

Supports synchronous page out or in of a specific buffer.

Includes fast reverse struct mapped_buf lookup.

Requires initial set of changes to nvmap as well.
---
 drivers/gpu/nvgpu/common/mm/gmmu.c | 54 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'drivers/gpu/nvgpu/common')

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 748e9f45..a04e501f 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -36,6 +36,9 @@
 
 #include "gk20a/mm_gk20a.h"
 
+// XXX: Shouldn't really be here! Needed for __nvgpu_update_paddr()
+#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
+
 #define __gmmu_dbg(g, attrs, fmt, args...)				\
 	do {								\
 		if (attrs->debug) {					\
@@ -938,3 +941,54 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
 
 	return 0;
 }
+
+u64 pgsz_enum_to_bytes(int sz) {
+	if (sz == GMMU_PAGE_SIZE_SMALL)
+		return SZ_4K;
+	else
+		return SZ_64K; // Dangerous! Big pages may also be 128k. Should check ram_in_big_page_size... registers.
+}
+
+// Caller is responsible for TLB/L2 flushing so that this can be called
+// repeatedly with low overhead.
+int __nvgpu_update_paddr(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u64 paddr)
+{
+	struct nvgpu_gmmu_pd *pd;
+	u32 pd_idx, pd_offs;
+	int err;
+	u32 pte[2]; // Safe for at least gv11b
+	struct nvgpu_gmmu_attrs attrs = {
+		.pgsz = 0,
+	};
+//	u32 pte_orig[2];
+
+	// Get existing pte entry and location
+	err = __nvgpu_locate_pte(g, vm, &vm->pdb,
+				 vaddr, 0, &attrs,
+				 pte, &pd, &pd_idx, &pd_offs);
+	if (unlikely(err)) {
+		printk(KERN_ERR "nvgpu: Unable to find PTE for vaddr %llx in __nvgpu_update_paddr()\n", vaddr);
+		return err;
+	}
+	// TODO: Verify that the PTE is actually in SYSMEM
+//	pte_orig[0] = pte[0];
+//	pte_orig[1] = pte[1];
+
+	// Following logic is borrowed from __update_pte() for gp10b+
+	// TODO: Make this work for gk20a-gp10b!
+	// Zero-out the address field
+	pte[0] &= ~gmmu_new_pte_address_sys_f(~0 >> gmmu_new_pte_address_shift_v());
+	pte[1] &= ~(~0U >> (24 + gmmu_new_pte_address_shift_v()));
+	// Write new address (upper and lower bits)
+	pte[0] |= gmmu_new_pte_address_sys_f(paddr >> gmmu_new_pte_address_shift_v());
+	pte[1] |= paddr >> (24 + gmmu_new_pte_address_shift_v());
+	// Commit to the page tables
+	pd_write(g, pd, pd_offs, pte[0]);
+	pd_write(g, pd, pd_offs + 1, pte[1]);
+	nvgpu_wmb(); // XXX: Is this needed?
+//	printk(KERN_INFO "nvgpu: Mapped vaddr %llx @ paddr %llx. %lluKb pg. [%08x, %08x]\n", vaddr, paddr, pgsz_enum_to_bytes(attrs.pgsz)/1024, pte[1], pte[0]);
+//	if (pte_orig[0] != pte[0] || pte_orig[1] != pte[1]) {
+//		printk(KERN_INFO "nvgpu: Updated PTE entry from {%x,%x} to {%x, %x}\n", pte_orig[0], pte_orig[1], pte[0], pte[1]);
+//	}
+	return pgsz_enum_to_bytes(attrs.pgsz);
+}
-- 
cgit v1.2.2