diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-06-28 20:30:46 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-12 10:44:47 -0400 |
commit | 90d388ebf8d2f9f9d08f6a5c0f638aa8339c1f24 (patch) | |
tree | 7966a3829615e25ebeddad3202a2e6f78b9beba6 /drivers/gpu/nvgpu/common/mm | |
parent | 3bc7e4aaddd2487ab65f66caa80cc0795b522fb6 (diff) |
gpu: nvgpu: Add get/set PTE routines
Add new routines for accessing and modifying PTEs in situ. They are:
__nvgpu_pte_words()
__nvgpu_get_pte()
__nvgpu_set_pte()
All the details of modifying a page table entry are handled within.
Note, however, that these routines will not build page tables. If a PTE
does not exist then said PTE will not be created. Instead -EINVAL will
be returned. But, keep in mind, a PTE marked as invalid still exists.
So this API can be used to mark an invalid PTE valid.
JIRA NVGPU-30
Change-Id: Ic8615f209a0c4eb6fa64af9abadcfb3b2c11ee73
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1510447
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1f3519aa..2b579bdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
843 | batch->need_tlb_invalidate = true; | 843 | batch->need_tlb_invalidate = true; |
844 | } | 844 | } |
845 | } | 845 | } |
846 | |||
847 | u32 __nvgpu_pte_words(struct gk20a *g) | ||
848 | { | ||
849 | const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K); | ||
850 | const struct gk20a_mmu_level *next_l; | ||
851 | |||
852 | /* | ||
853 | * Iterate to the bottom GMMU level - the PTE level. The levels array | ||
854 | * is always NULL terminated (by the update_entry function). | ||
855 | */ | ||
856 | do { | ||
857 | next_l = l + 1; | ||
858 | if (!next_l->update_entry) | ||
859 | break; | ||
860 | |||
861 | l++; | ||
862 | } while (true); | ||
863 | |||
864 | return (u32)(l->entry_size / sizeof(u32)); | ||
865 | } | ||
866 | |||
867 | /* | ||
868 | * Recursively walk the pages tables to find the PTE. | ||
869 | */ | ||
870 | static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, | ||
871 | struct nvgpu_gmmu_pd *pd, | ||
872 | u64 vaddr, int lvl, | ||
873 | struct nvgpu_gmmu_attrs *attrs, | ||
874 | u32 *data, | ||
875 | struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out, | ||
876 | u32 *pd_offs_out) | ||
877 | { | ||
878 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
879 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1]; | ||
880 | u32 pd_idx = pd_index(l, vaddr, attrs); | ||
881 | u32 pte_base; | ||
882 | u32 pte_size; | ||
883 | u32 i; | ||
884 | |||
885 | /* | ||
886 | * If this isn't the final level (i.e there's a valid next level) | ||
887 | * then find the next level PD and recurse. | ||
888 | */ | ||
889 | if (next_l->update_entry) { | ||
890 | struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx; | ||
891 | |||
892 | /* Invalid entry! */ | ||
893 | if (!pd_next->mem) | ||
894 | return -EINVAL; | ||
895 | |||
896 | return __nvgpu_locate_pte(g, vm, pd_next, | ||
897 | vaddr, lvl + 1, attrs, | ||
898 | data, pd_out, pd_idx_out, | ||
899 | pd_offs_out); | ||
900 | } | ||
901 | |||
902 | if (!pd->mem) | ||
903 | return -EINVAL; | ||
904 | |||
905 | /* | ||
906 | * Take into account the real offset into the nvgpu_mem since the PD | ||
907 | * may be located at an offset other than 0 (due to PD packing). | ||
908 | */ | ||
909 | pte_base = (pd->mem_offs / sizeof(u32)) + | ||
910 | pd_offset_from_index(l, pd_idx); | ||
911 | pte_size = (u32)(l->entry_size / sizeof(u32)); | ||
912 | |||
913 | if (data) { | ||
914 | map_gmmu_pages(g, pd); | ||
915 | for (i = 0; i < pte_size; i++) | ||
916 | data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i); | ||
917 | unmap_gmmu_pages(g, pd); | ||
918 | } | ||
919 | |||
920 | if (pd_out) | ||
921 | *pd_out = pd; | ||
922 | |||
923 | if (pd_idx_out) | ||
924 | *pd_idx_out = pd_idx; | ||
925 | |||
926 | if (pd_offs_out) | ||
927 | *pd_offs_out = pd_offset_from_index(l, pd_idx); | ||
928 | |||
929 | return 0; | ||
930 | } | ||
931 | |||
932 | int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) | ||
933 | { | ||
934 | struct nvgpu_gmmu_attrs attrs = { | ||
935 | .pgsz = 0, | ||
936 | }; | ||
937 | |||
938 | return __nvgpu_locate_pte(g, vm, &vm->pdb, | ||
939 | vaddr, 0, &attrs, | ||
940 | pte, NULL, NULL, NULL); | ||
941 | } | ||
942 | |||
943 | int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) | ||
944 | { | ||
945 | struct nvgpu_gmmu_pd *pd; | ||
946 | u32 pd_idx, pd_offs, pte_size, i; | ||
947 | int err; | ||
948 | struct nvgpu_gmmu_attrs attrs = { | ||
949 | .pgsz = 0, | ||
950 | }; | ||
951 | struct nvgpu_gmmu_attrs *attrs_ptr = &attrs; | ||
952 | |||
953 | err = __nvgpu_locate_pte(g, vm, &vm->pdb, | ||
954 | vaddr, 0, &attrs, | ||
955 | NULL, &pd, &pd_idx, &pd_offs); | ||
956 | if (err) | ||
957 | return err; | ||
958 | |||
959 | pte_size = __nvgpu_pte_words(g); | ||
960 | |||
961 | map_gmmu_pages(g, pd); | ||
962 | for (i = 0; i < pte_size; i++) { | ||
963 | pd_write(g, pd, pd_offs + i, pte[i]); | ||
964 | pte_dbg(g, attrs_ptr, | ||
965 | "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]); | ||
966 | } | ||
967 | unmap_gmmu_pages(g, pd); | ||
968 | |||
969 | /* | ||
970 | * Ensures the pd_write()s are done. The pd_write() does not do this | ||
971 | * since generally there's lots of pd_write()s called one after another. | ||
972 | * There probably also needs to be a TLB invalidate as well but we leave | ||
973 | * that to the caller of this function. | ||
974 | */ | ||
975 | wmb(); | ||
976 | |||
977 | return 0; | ||
978 | } | ||