gpu: nvgpu: Add get/set PTE routines

Add new routines for accessing and modifying PTEs in situ. They are: __nvgpu_pte_words() __nvgpu_get_pte() __nvgpu_set_pte() All the details of modifying a page table entry are handled within. Note, however, that these routines will not build page tables. If a PTE does not exist then said PTE will not be created. Instead -EINVAL will be returned. But, keep in mind, a PTE marked as invalid still exists. So this API can be used to mark an invalid PTE valid. JIRA NVGPU-30 Change-Id: Ic8615f209a0c4eb6fa64af9abadcfb3b2c11ee73 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1510447 Reviewed-by: Automatic_Commit_Validation_User Tested-by: Seema Khowala <seemaj@nvidia.com> Reviewed-by: Seema Khowala <seemaj@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-06-28 20:30:46 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-07-12 10:44:47 -0400
commit: 90d388ebf8d2f9f9d08f6a5c0f638aa8339c1f24 (patch)
tree: 7966a3829615e25ebeddad3202a2e6f78b9beba6 /drivers
parent: 3bc7e4aaddd2487ab65f66caa80cc0795b522fb6 (diff)
2 files changed, 183 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 1f3519aa..2b579bdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
                batch->need_tlb_invalidate = true;
        }
 }
+u32 __nvgpu_pte_words(struct gk20a *g)
+{
+        const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
+        const struct gk20a_mmu_level *next_l;
+        /*
+         * Iterate to the bottom GMMU level - the PTE level. The levels array
+         * is always NULL terminated (by the update_entry function).
+         */
+        do {
+                next_l = l + 1;
+                if (!next_l->update_entry)
+                        break;
+                l++;
+        } while (true);
+        return (u32)(l->entry_size / sizeof(u32));
+}
+/*
+ * Recursively walk the pages tables to find the PTE.
+ */
+static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
+                              struct nvgpu_gmmu_pd *pd,
+                              u64 vaddr, int lvl,
+                              struct nvgpu_gmmu_attrs *attrs,
+                              u32 *data,
+                              struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out,
+                              u32 *pd_offs_out)
+{
+        const struct gk20a_mmu_level *l      = &vm->mmu_levels[lvl];
+        const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1];
+        u32 pd_idx = pd_index(l, vaddr, attrs);
+        u32 pte_base;
+        u32 pte_size;
+        u32 i;
+        /*
+         * If this isn't the final level (i.e there's a valid next level)
+         * then find the next level PD and recurse.
+         */
+        if (next_l->update_entry) {
+                struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
+                /* Invalid entry! */
+                if (!pd_next->mem)
+                        return -EINVAL;
+                return __nvgpu_locate_pte(g, vm, pd_next,
+                                          vaddr, lvl + 1, attrs,
+                                          data, pd_out, pd_idx_out,
+                                          pd_offs_out);
+        }
+        if (!pd->mem)
+                return -EINVAL;
+        /*
+         * Take into account the real offset into the nvgpu_mem since the PD
+         * may be located at an offset other than 0 (due to PD packing).
+         */
+        pte_base = (pd->mem_offs / sizeof(u32)) +
+                pd_offset_from_index(l, pd_idx);
+        pte_size = (u32)(l->entry_size / sizeof(u32));
+        if (data) {
+                map_gmmu_pages(g, pd);
+                for (i = 0; i < pte_size; i++)
+                        data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i);
+                unmap_gmmu_pages(g, pd);
+        }
+        if (pd_out)
+                *pd_out = pd;
+        if (pd_idx_out)
+                *pd_idx_out = pd_idx;
+        if (pd_offs_out)
+                *pd_offs_out = pd_offset_from_index(l, pd_idx);
+        return 0;
+}
+int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
+{
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz = 0,
+        };
+        return __nvgpu_locate_pte(g, vm, &vm->pdb,
+                                  vaddr, 0, &attrs,
+                                  pte, NULL, NULL, NULL);
+}
+int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
+{
+        struct nvgpu_gmmu_pd *pd;
+        u32 pd_idx, pd_offs, pte_size, i;
+        int err;
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz = 0,
+        };
+        struct nvgpu_gmmu_attrs *attrs_ptr = &attrs;
+        err = __nvgpu_locate_pte(g, vm, &vm->pdb,
+                                 vaddr, 0, &attrs,
+                                 NULL, &pd, &pd_idx, &pd_offs);
+        if (err)
+                return err;
+        pte_size = __nvgpu_pte_words(g);
+        map_gmmu_pages(g, pd);
+        for (i = 0; i < pte_size; i++) {
+                pd_write(g, pd, pd_offs + i, pte[i]);
+                pte_dbg(g, attrs_ptr,
+                        "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
+        }
+        unmap_gmmu_pages(g, pd);
+        /*
+         * Ensures the pd_write()s are done. The pd_write() does not do this
+         * since generally there's lots of pd_write()s called one after another.
+         * There probably also needs to be a TLB invalidate as well but we leave
+         * that to the caller of this function.
+         */
+        wmb();
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 92e5eb5f..de129a5f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -273,6 +273,56 @@ static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
        nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
 }
+/**
+ * __nvgpu_pte_words - Compute number of words in a PTE.
+ *
+ * @g  - The GPU.
+ *
+ * This computes and returns the size of a PTE for the passed chip.
+ */
+u32 __nvgpu_pte_words(struct gk20a *g);
+/**
+ * __nvgpu_get_pte - Get the contents of a PTE by virtual address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @pte   - [out] Set to the contents of the PTE.
+ *
+ * Find a PTE in the passed VM based on the passed GPU virtual address. This
+ * will @pte with a copy of the contents of the PTE. @pte must be an array of
+ * u32s large enough to contain the PTE. This can be computed using
+ * __nvgpu_pte_words().
+ *
+ * If you wish to write to this PTE then you may modify @pte and then use the
+ * __nvgpu_set_pte().
+ *
+ * This function returns 0 if the PTE is found and -EINVAL otherwise.
+ */
+int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
+/**
+ * __nvgpu_set_pte - Set a PTE based on virtual address
+ *
+ * @g     - The GPU.
+ * @vm    - VM to look in.
+ * @vaddr - GPU virtual address.
+ * @pte   - The contents of the PTE to write.
+ *
+ * Find a PTE and overwrite the contents of that PTE with the passed in data
+ * located in @pte. If the PTE does not exist then no writing will happen. That
+ * is this function will not fill out the page tables for you. The expectation
+ * is that the passed @vaddr has already been mapped and this is just modifying
+ * the mapping (for instance changing invalid to valid).
+ *
+ * @pte must contain at least the required words for the PTE. See
+ * __nvgpu_pte_words().
+ *
+ * This function returns 0 on success and -EINVAL otherwise.
+ */
+int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
 /*
 * Internal debugging routines. Probably not something you want to use.
author	Alex Waterman <alexw@nvidia.com>	2017-06-28 20:30:46 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-07-12 10:44:47 -0400
commit	90d388ebf8d2f9f9d08f6a5c0f638aa8339c1f24 (patch)
tree	7966a3829615e25ebeddad3202a2e6f78b9beba6 /drivers
parent	3bc7e4aaddd2487ab65f66caa80cc0795b522fb6 (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1f3519aa..2b579bdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
843	batch->need_tlb_invalidate = true;	843	batch->need_tlb_invalidate = true;
844	}	844	}
845	}	845	}
		846
		847	u32 __nvgpu_pte_words(struct gk20a *g)
		848	{
		849	const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
		850	const struct gk20a_mmu_level *next_l;
		851
		852	/*
		853	* Iterate to the bottom GMMU level - the PTE level. The levels array
		854	* is always NULL terminated (by the update_entry function).
		855	*/
		856	do {
		857	next_l = l + 1;
		858	if (!next_l->update_entry)
		859	break;
		860
		861	l++;
		862	} while (true);
		863
		864	return (u32)(l->entry_size / sizeof(u32));
		865	}
		866
		867	/*
		868	* Recursively walk the pages tables to find the PTE.
		869	*/
		870	static int __nvgpu_locate_pte(struct gk20a g, struct vm_gk20a vm,
		871	struct nvgpu_gmmu_pd *pd,
		872	u64 vaddr, int lvl,
		873	struct nvgpu_gmmu_attrs *attrs,
		874	u32 *data,
		875	struct nvgpu_gmmu_pd *pd_out, u32 pd_idx_out,
		876	u32 *pd_offs_out)
		877	{
		878	const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
		879	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1];
		880	u32 pd_idx = pd_index(l, vaddr, attrs);
		881	u32 pte_base;
		882	u32 pte_size;
		883	u32 i;
		884
		885	/*
		886	* If this isn't the final level (i.e there's a valid next level)
		887	* then find the next level PD and recurse.
		888	*/
		889	if (next_l->update_entry) {
		890	struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
		891
		892	/* Invalid entry! */
		893	if (!pd_next->mem)
		894	return -EINVAL;
		895
		896	return __nvgpu_locate_pte(g, vm, pd_next,
		897	vaddr, lvl + 1, attrs,
		898	data, pd_out, pd_idx_out,
		899	pd_offs_out);
		900	}
		901
		902	if (!pd->mem)
		903	return -EINVAL;
		904
		905	/*
		906	* Take into account the real offset into the nvgpu_mem since the PD
		907	* may be located at an offset other than 0 (due to PD packing).
		908	*/
		909	pte_base = (pd->mem_offs / sizeof(u32)) +
		910	pd_offset_from_index(l, pd_idx);
		911	pte_size = (u32)(l->entry_size / sizeof(u32));
		912
		913	if (data) {
		914	map_gmmu_pages(g, pd);
		915	for (i = 0; i < pte_size; i++)
		916	data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i);
		917	unmap_gmmu_pages(g, pd);
		918	}
		919
		920	if (pd_out)
		921	*pd_out = pd;
		922
		923	if (pd_idx_out)
		924	*pd_idx_out = pd_idx;
		925
		926	if (pd_offs_out)
		927	*pd_offs_out = pd_offset_from_index(l, pd_idx);
		928
		929	return 0;
		930	}
		931
		932	int __nvgpu_get_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte)
		933	{
		934	struct nvgpu_gmmu_attrs attrs = {
		935	.pgsz = 0,
		936	};
		937
		938	return __nvgpu_locate_pte(g, vm, &vm->pdb,
		939	vaddr, 0, &attrs,
		940	pte, NULL, NULL, NULL);
		941	}
		942
		943	int __nvgpu_set_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte)
		944	{
		945	struct nvgpu_gmmu_pd *pd;
		946	u32 pd_idx, pd_offs, pte_size, i;
		947	int err;
		948	struct nvgpu_gmmu_attrs attrs = {
		949	.pgsz = 0,
		950	};
		951	struct nvgpu_gmmu_attrs *attrs_ptr = &attrs;
		952
		953	err = __nvgpu_locate_pte(g, vm, &vm->pdb,
		954	vaddr, 0, &attrs,
		955	NULL, &pd, &pd_idx, &pd_offs);
		956	if (err)
		957	return err;
		958
		959	pte_size = __nvgpu_pte_words(g);
		960
		961	map_gmmu_pages(g, pd);
		962	for (i = 0; i < pte_size; i++) {
		963	pd_write(g, pd, pd_offs + i, pte[i]);
		964	pte_dbg(g, attrs_ptr,
		965	"PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
		966	}
		967	unmap_gmmu_pages(g, pd);
		968
		969	/*
		970	* Ensures the pd_write()s are done. The pd_write() does not do this
		971	* since generally there's lots of pd_write()s called one after another.
		972	* There probably also needs to be a TLB invalidate as well but we leave
		973	* that to the caller of this function.
		974	*/
		975	wmb();
		976
		977	return 0;
		978	}


diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 92e5eb5f..de129a5f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -273,6 +273,56 @@ static inline void pd_write(struct gk20a g, struct nvgpu_gmmu_pd pd,
273	nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);	273	nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
274	}	274	}
275		275
		276	/**
		277	* __nvgpu_pte_words - Compute number of words in a PTE.
		278	*
		279	* @g - The GPU.
		280	*
		281	* This computes and returns the size of a PTE for the passed chip.
		282	*/
		283	u32 __nvgpu_pte_words(struct gk20a *g);
		284
		285	/**
		286	* __nvgpu_get_pte - Get the contents of a PTE by virtual address
		287	*
		288	* @g - The GPU.
		289	* @vm - VM to look in.
		290	* @vaddr - GPU virtual address.
		291	* @pte - [out] Set to the contents of the PTE.
		292	*
		293	* Find a PTE in the passed VM based on the passed GPU virtual address. This
		294	* will @pte with a copy of the contents of the PTE. @pte must be an array of
		295	* u32s large enough to contain the PTE. This can be computed using
		296	* __nvgpu_pte_words().
		297	*
		298	* If you wish to write to this PTE then you may modify @pte and then use the
		299	* __nvgpu_set_pte().
		300	*
		301	* This function returns 0 if the PTE is found and -EINVAL otherwise.
		302	*/
		303	int __nvgpu_get_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte);
		304
		305	/**
		306	* __nvgpu_set_pte - Set a PTE based on virtual address
		307	*
		308	* @g - The GPU.
		309	* @vm - VM to look in.
		310	* @vaddr - GPU virtual address.
		311	* @pte - The contents of the PTE to write.
		312	*
		313	* Find a PTE and overwrite the contents of that PTE with the passed in data
		314	* located in @pte. If the PTE does not exist then no writing will happen. That
		315	* is this function will not fill out the page tables for you. The expectation
		316	* is that the passed @vaddr has already been mapped and this is just modifying
		317	* the mapping (for instance changing invalid to valid).
		318	*
		319	* @pte must contain at least the required words for the PTE. See
		320	* __nvgpu_pte_words().
		321	*
		322	* This function returns 0 on success and -EINVAL otherwise.
		323	*/
		324	int __nvgpu_set_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte);
		325
276		326
277	/*	327	/*
278	* Internal debugging routines. Probably not something you want to use.	328	* Internal debugging routines. Probably not something you want to use.