summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-06-28 20:30:46 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-12 10:44:47 -0400
commit90d388ebf8d2f9f9d08f6a5c0f638aa8339c1f24 (patch)
tree7966a3829615e25ebeddad3202a2e6f78b9beba6 /drivers
parent3bc7e4aaddd2487ab65f66caa80cc0795b522fb6 (diff)
gpu: nvgpu: Add get/set PTE routines
Add new routines for accessing and modifying PTEs in situ. They are: __nvgpu_pte_words() __nvgpu_get_pte() __nvgpu_set_pte() All the details of modifying a page table entry are handled within. Note, however, that these routines will not build page tables. If a PTE does not exist then said PTE will not be created. Instead -EINVAL will be returned. But, keep in mind, a PTE marked as invalid still exists. So this API can be used to mark an invalid PTE valid. JIRA NVGPU-30 Change-Id: Ic8615f209a0c4eb6fa64af9abadcfb3b2c11ee73 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1510447 Reviewed-by: Automatic_Commit_Validation_User Tested-by: Seema Khowala <seemaj@nvidia.com> Reviewed-by: Seema Khowala <seemaj@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c133
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h50
2 files changed, 183 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 1f3519aa..2b579bdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
843 batch->need_tlb_invalidate = true; 843 batch->need_tlb_invalidate = true;
844 } 844 }
845} 845}
846
847u32 __nvgpu_pte_words(struct gk20a *g)
848{
849 const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
850 const struct gk20a_mmu_level *next_l;
851
852 /*
853 * Iterate to the bottom GMMU level - the PTE level. The levels array
854 * is always NULL terminated (by the update_entry function).
855 */
856 do {
857 next_l = l + 1;
858 if (!next_l->update_entry)
859 break;
860
861 l++;
862 } while (true);
863
864 return (u32)(l->entry_size / sizeof(u32));
865}
866
867/*
868 * Recursively walk the pages tables to find the PTE.
869 */
870static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
871 struct nvgpu_gmmu_pd *pd,
872 u64 vaddr, int lvl,
873 struct nvgpu_gmmu_attrs *attrs,
874 u32 *data,
875 struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out,
876 u32 *pd_offs_out)
877{
878 const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
879 const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1];
880 u32 pd_idx = pd_index(l, vaddr, attrs);
881 u32 pte_base;
882 u32 pte_size;
883 u32 i;
884
885 /*
886 * If this isn't the final level (i.e there's a valid next level)
887 * then find the next level PD and recurse.
888 */
889 if (next_l->update_entry) {
890 struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
891
892 /* Invalid entry! */
893 if (!pd_next->mem)
894 return -EINVAL;
895
896 return __nvgpu_locate_pte(g, vm, pd_next,
897 vaddr, lvl + 1, attrs,
898 data, pd_out, pd_idx_out,
899 pd_offs_out);
900 }
901
902 if (!pd->mem)
903 return -EINVAL;
904
905 /*
906 * Take into account the real offset into the nvgpu_mem since the PD
907 * may be located at an offset other than 0 (due to PD packing).
908 */
909 pte_base = (pd->mem_offs / sizeof(u32)) +
910 pd_offset_from_index(l, pd_idx);
911 pte_size = (u32)(l->entry_size / sizeof(u32));
912
913 if (data) {
914 map_gmmu_pages(g, pd);
915 for (i = 0; i < pte_size; i++)
916 data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i);
917 unmap_gmmu_pages(g, pd);
918 }
919
920 if (pd_out)
921 *pd_out = pd;
922
923 if (pd_idx_out)
924 *pd_idx_out = pd_idx;
925
926 if (pd_offs_out)
927 *pd_offs_out = pd_offset_from_index(l, pd_idx);
928
929 return 0;
930}
931
932int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
933{
934 struct nvgpu_gmmu_attrs attrs = {
935 .pgsz = 0,
936 };
937
938 return __nvgpu_locate_pte(g, vm, &vm->pdb,
939 vaddr, 0, &attrs,
940 pte, NULL, NULL, NULL);
941}
942
943int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
944{
945 struct nvgpu_gmmu_pd *pd;
946 u32 pd_idx, pd_offs, pte_size, i;
947 int err;
948 struct nvgpu_gmmu_attrs attrs = {
949 .pgsz = 0,
950 };
951 struct nvgpu_gmmu_attrs *attrs_ptr = &attrs;
952
953 err = __nvgpu_locate_pte(g, vm, &vm->pdb,
954 vaddr, 0, &attrs,
955 NULL, &pd, &pd_idx, &pd_offs);
956 if (err)
957 return err;
958
959 pte_size = __nvgpu_pte_words(g);
960
961 map_gmmu_pages(g, pd);
962 for (i = 0; i < pte_size; i++) {
963 pd_write(g, pd, pd_offs + i, pte[i]);
964 pte_dbg(g, attrs_ptr,
965 "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
966 }
967 unmap_gmmu_pages(g, pd);
968
969 /*
970 * Ensures the pd_write()s are done. The pd_write() does not do this
971 * since generally there's lots of pd_write()s called one after another.
972 * There probably also needs to be a TLB invalidate as well but we leave
973 * that to the caller of this function.
974 */
975 wmb();
976
977 return 0;
978}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 92e5eb5f..de129a5f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -273,6 +273,56 @@ static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
273 nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data); 273 nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
274} 274}
275 275
276/**
277 * __nvgpu_pte_words - Compute number of words in a PTE.
278 *
279 * @g - The GPU.
280 *
281 * This computes and returns the size of a PTE for the passed chip.
282 */
283u32 __nvgpu_pte_words(struct gk20a *g);
284
285/**
286 * __nvgpu_get_pte - Get the contents of a PTE by virtual address
287 *
288 * @g - The GPU.
289 * @vm - VM to look in.
290 * @vaddr - GPU virtual address.
291 * @pte - [out] Set to the contents of the PTE.
292 *
293 * Find a PTE in the passed VM based on the passed GPU virtual address. This
294 * will @pte with a copy of the contents of the PTE. @pte must be an array of
295 * u32s large enough to contain the PTE. This can be computed using
296 * __nvgpu_pte_words().
297 *
298 * If you wish to write to this PTE then you may modify @pte and then use the
299 * __nvgpu_set_pte().
300 *
301 * This function returns 0 if the PTE is found and -EINVAL otherwise.
302 */
303int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
304
305/**
306 * __nvgpu_set_pte - Set a PTE based on virtual address
307 *
308 * @g - The GPU.
309 * @vm - VM to look in.
310 * @vaddr - GPU virtual address.
311 * @pte - The contents of the PTE to write.
312 *
313 * Find a PTE and overwrite the contents of that PTE with the passed in data
314 * located in @pte. If the PTE does not exist then no writing will happen. That
315 * is this function will not fill out the page tables for you. The expectation
316 * is that the passed @vaddr has already been mapped and this is just modifying
317 * the mapping (for instance changing invalid to valid).
318 *
319 * @pte must contain at least the required words for the PTE. See
320 * __nvgpu_pte_words().
321 *
322 * This function returns 0 on success and -EINVAL otherwise.
323 */
324int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte);
325
276 326
277/* 327/*
278 * Internal debugging routines. Probably not something you want to use. 328 * Internal debugging routines. Probably not something you want to use.