From 90d388ebf8d2f9f9d08f6a5c0f638aa8339c1f24 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Wed, 28 Jun 2017 17:30:46 -0700
Subject: gpu: nvgpu: Add get/set PTE routines

Add new routines for accessing and modifying PTEs in situ. They are:

  __nvgpu_pte_words()
  __nvgpu_get_pte()
  __nvgpu_set_pte()

All the details of modifying a page table entry are handled within.

Note, however, that these routines will not build page tables. If a PTE
does not exist then said PTE will not be created. Instead -EINVAL will
be returned. But, keep in mind, a PTE marked as invalid still exists.
So this API can be used to mark an invalid PTE valid.

JIRA NVGPU-30

Change-Id: Ic8615f209a0c4eb6fa64af9abadcfb3b2c11ee73
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1510447
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu.c     | 133 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h |  50 +++++++++++++
 2 files changed, 183 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 1f3519aa..2b579bdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		batch->need_tlb_invalidate = true;
 	}
 }
+
+u32 __nvgpu_pte_words(struct gk20a *g)
+{
+	const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
+	const struct gk20a_mmu_level *next_l;
+
+	/*
+	 * Iterate to the bottom GMMU level - the PTE level. The levels array
+	 * is always NULL terminated (by the update_entry function).
+	 */
+	do {
+		next_l = l + 1;
+		if (!next_l->update_entry)
+			break;
+
+		l++;
+	} while (true);
+
+	return (u32)(l->entry_size / sizeof(u32));
+}
+
+/*
+ * Recursively walk the pages tables to find the PTE.
+ */
+static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
+			      struct nvgpu_gmmu_pd *pd,
+			      u64 vaddr, int lvl,
+			      struct nvgpu_gmmu_attrs *attrs,
+			      u32 *data,
+			      struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out,
+			      u32 *pd_offs_out)
+{
+	const struct gk20a_mmu_level *l      = &vm->mmu_levels[lvl];
+	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1];
+	u32 pd_idx = pd_index(l, vaddr, attrs);
+	u32 pte_base;
+	u32 pte_size;
+	u32 i;
+
+	/*
+	 * If this isn't the final level (i.e there's a valid next level)
+	 * then find the next level PD and recurse.
+	 */
+	if (next_l->update_entry) {
+		struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
+
+		/* Invalid entry! */
+		if (!pd_next->mem)
+			return -EINVAL;
+
+		return __nvgpu_locate_pte(g, vm, pd_next,
+					  vaddr, lvl + 1, attrs,
+					  data, pd_out, pd_idx_out,
+					  pd_offs_out);
+	}
+
+	if (!pd->mem)
+		return -EINVAL;
+
+	/*
+	 * Take into account the real offset into the nvgpu_mem since the PD
+	 * may be located at an offset other than 0 (due to PD packing).
+	 */
+	pte_base = (pd->mem_offs / sizeof(u32)) +
+		pd_offset_from_index(l, pd_idx);
+	pte_size = (u32)(l->entry_size / sizeof(u32));
+
+	if (data) {
+		map_gmmu_pages(g, pd);
+		for (i = 0; i < pte_size; i++)
+			data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i);
+		unmap_gmmu_pages(g, pd);
+	}
+
+	if (pd_out)
+		*pd_out = pd;
+
+	if (pd_idx_out)
+		*pd_idx_out = pd_idx;
+
+	if (pd_offs_out)
+		*pd_offs_out = pd_offset_from_index(l, pd_idx);
+
+	return 0;
+}
+
+int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
+{
+	struct nvgpu_gmmu_attrs attrs = {
+		.pgsz = 0,
+	};
+
+	return __nvgpu_locate_pte(g, vm, &vm->pdb,
+				  vaddr, 0, &attrs,
+				  pte, NULL, NULL, NULL);
+}
+
+int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
+{
+	struct nvgpu_gmmu_pd *pd;
+	u32 pd_idx, pd_offs, pte_size, i;
+	int err;
+	struct nvgpu_gmmu_attrs attrs = {
+		.pgsz = 0,
+	};
+	struct nvgpu_gmmu_attrs *attrs_ptr = &attrs;
+
+	err = __nvgpu_locate_pte(g, vm, &vm->pdb,
+				 vaddr, 0, &attrs,
+				 NULL, &pd, &pd_idx, &pd_offs);
+	if (err)
+		return err;
+
+	pte_size = __nvgpu_pte_words(g);
+
+	map_gmmu_pages(g, pd);
+	for (i = 0; i < pte_size; i++) {
+		pd_write(g, pd, pd_offs + i, pte[i]);
+		pte_dbg(g, attrs_ptr,
+			"PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
+	}
+	unmap_gmmu_pages(g, pd);
+
+	/*
+	 * Ensures the pd_write()s are done. The pd_write() does not do this
+	 * since generally there's lots of pd_write()s called one after another.
+	 * There probably also needs to be a TLB invalidate as well but we leave
+	 * that to the caller of this function.
+	 */
+	wmb();
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 92e5eb5f..de129a5f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -273,6 +273,56 @@ static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
 	nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
 }
 
+/**
+ * __nvgpu_pte_words - Compute number of words in a PTE.
+ *
+ * @g  - The GPU.
+ *
+ * This computes and returns the size of a PTE for the passed chip.
+ */
+u32 __nvgpu_pte_words(struct gk20a *g);
+
+/**
+ * __nvgpu_get_pte - Get the contents of a PTE by virtual address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @pte   - [out] Set to the contents of the PTE.
+ *
+ * Find a PTE in the passed VM based on the passed GPU virtual address. This
+ * will @pte with a copy of the contents of the PTE. @pte must be an array of
+ * u32s large enough to contain the PTE. This can be computed using
+ * __nvgpu_pte_words().
+ *
+ * If you wish to write to this PTE then you may modify @pte and then use the
+ * __nvgpu_set_pte().
+ *
+ * This function returns 0 if the PTE is found and -EINVAL otherwise.
+ */
+int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
+
+/**
+ * __nvgpu_set_pte - Set a PTE based on virtual address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @pte   - The contents of the PTE to write.
+ *
+ * Find a PTE and overwrite the contents of that PTE with the passed in data
+ * located in @pte. If the PTE does not exist then no writing will happen. That
+ * is this function will not fill out the page tables for you. The expectation
+ * is that the passed @vaddr has already been mapped and this is just modifying
+ * the mapping (for instance changing invalid to valid).
+ *
+ * @pte must contain at least the required words for the PTE. See
+ * __nvgpu_pte_words().
+ *
+ * This function returns 0 on success and -EINVAL otherwise.
+ */
+int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
+
 
 /*
  * Internal debugging routines. Probably not something you want to use.
-- 
cgit v1.2.2