From 90d388ebf8d2f9f9d08f6a5c0f638aa8339c1f24 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 28 Jun 2017 17:30:46 -0700 Subject: gpu: nvgpu: Add get/set PTE routines Add new routines for accessing and modifying PTEs in situ. They are: __nvgpu_pte_words() __nvgpu_get_pte() __nvgpu_set_pte() All the details of modifying a page table entry are handled within. Note, however, that these routines will not build page tables. If a PTE does not exist then said PTE will not be created. Instead -EINVAL will be returned. But, keep in mind, a PTE marked as invalid still exists. So this API can be used to mark an invalid PTE valid. JIRA NVGPU-30 Change-Id: Ic8615f209a0c4eb6fa64af9abadcfb3b2c11ee73 Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1510447 Reviewed-by: Automatic_Commit_Validation_User Tested-by: Seema Khowala Reviewed-by: Seema Khowala GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/common/mm/gmmu.c | 133 +++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 50 +++++++++++++ 2 files changed, 183 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1f3519aa..2b579bdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, batch->need_tlb_invalidate = true; } } + +u32 __nvgpu_pte_words(struct gk20a *g) +{ + const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K); + const struct gk20a_mmu_level *next_l; + + /* + * Iterate to the bottom GMMU level - the PTE level. The levels array + * is always NULL terminated (by the update_entry function). + */ + do { + next_l = l + 1; + if (!next_l->update_entry) + break; + + l++; + } while (true); + + return (u32)(l->entry_size / sizeof(u32)); +} + +/* + * Recursively walk the pages tables to find the PTE. + */ +static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + u64 vaddr, int lvl, + struct nvgpu_gmmu_attrs *attrs, + u32 *data, + struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out, + u32 *pd_offs_out) +{ + const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; + const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1]; + u32 pd_idx = pd_index(l, vaddr, attrs); + u32 pte_base; + u32 pte_size; + u32 i; + + /* + * If this isn't the final level (i.e there's a valid next level) + * then find the next level PD and recurse. + */ + if (next_l->update_entry) { + struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx; + + /* Invalid entry! */ + if (!pd_next->mem) + return -EINVAL; + + return __nvgpu_locate_pte(g, vm, pd_next, + vaddr, lvl + 1, attrs, + data, pd_out, pd_idx_out, + pd_offs_out); + } + + if (!pd->mem) + return -EINVAL; + + /* + * Take into account the real offset into the nvgpu_mem since the PD + * may be located at an offset other than 0 (due to PD packing). + */ + pte_base = (pd->mem_offs / sizeof(u32)) + + pd_offset_from_index(l, pd_idx); + pte_size = (u32)(l->entry_size / sizeof(u32)); + + if (data) { + map_gmmu_pages(g, pd); + for (i = 0; i < pte_size; i++) + data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i); + unmap_gmmu_pages(g, pd); + } + + if (pd_out) + *pd_out = pd; + + if (pd_idx_out) + *pd_idx_out = pd_idx; + + if (pd_offs_out) + *pd_offs_out = pd_offset_from_index(l, pd_idx); + + return 0; +} + +int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) +{ + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; + + return __nvgpu_locate_pte(g, vm, &vm->pdb, + vaddr, 0, &attrs, + pte, NULL, NULL, NULL); +} + +int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) +{ + struct nvgpu_gmmu_pd *pd; + u32 pd_idx, pd_offs, pte_size, i; + int err; + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; + struct nvgpu_gmmu_attrs *attrs_ptr = &attrs; + + err = __nvgpu_locate_pte(g, vm, &vm->pdb, + vaddr, 0, &attrs, + NULL, &pd, &pd_idx, &pd_offs); + if (err) + return err; + + pte_size = __nvgpu_pte_words(g); + + map_gmmu_pages(g, pd); + for (i = 0; i < pte_size; i++) { + pd_write(g, pd, pd_offs + i, pte[i]); + pte_dbg(g, attrs_ptr, + "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]); + } + unmap_gmmu_pages(g, pd); + + /* + * Ensures the pd_write()s are done. The pd_write() does not do this + * since generally there's lots of pd_write()s called one after another. + * There probably also needs to be a TLB invalidate as well but we leave + * that to the caller of this function. + */ + wmb(); + + return 0; +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 92e5eb5f..de129a5f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -273,6 +273,56 @@ static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data); } +/** + * __nvgpu_pte_words - Compute number of words in a PTE. + * + * @g - The GPU. + * + * This computes and returns the size of a PTE for the passed chip. + */ +u32 __nvgpu_pte_words(struct gk20a *g); + +/** + * __nvgpu_get_pte - Get the contents of a PTE by virtual address + * + * @g - The GPU. + * @vm - VM to look in. + * @vaddr - GPU virtual address. + * @pte - [out] Set to the contents of the PTE. + * + * Find a PTE in the passed VM based on the passed GPU virtual address. This + * will @pte with a copy of the contents of the PTE. @pte must be an array of + * u32s large enough to contain the PTE. This can be computed using + * __nvgpu_pte_words(). + * + * If you wish to write to this PTE then you may modify @pte and then use the + * __nvgpu_set_pte(). + * + * This function returns 0 if the PTE is found and -EINVAL otherwise. + */ +int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); + +/** + * __nvgpu_set_pte - Set a PTE based on virtual address + * + * @g - The GPU. + * @vm - VM to look in. + * @vaddr - GPU virtual address. + * @pte - The contents of the PTE to write. + * + * Find a PTE and overwrite the contents of that PTE with the passed in data + * located in @pte. If the PTE does not exist then no writing will happen. That + * is this function will not fill out the page tables for you. The expectation + * is that the passed @vaddr has already been mapped and this is just modifying + * the mapping (for instance changing invalid to valid). + * + * @pte must contain at least the required words for the PTE. See + * __nvgpu_pte_words(). + * + * This function returns 0 on success and -EINVAL otherwise. + */ +int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); + /* * Internal debugging routines. Probably not something you want to use. -- cgit v1.2.2