diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 133 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 50 |
2 files changed, 183 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1f3519aa..2b579bdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -843,3 +843,136 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
843 | batch->need_tlb_invalidate = true; | 843 | batch->need_tlb_invalidate = true; |
844 | } | 844 | } |
845 | } | 845 | } |
846 | |||
847 | u32 __nvgpu_pte_words(struct gk20a *g) | ||
848 | { | ||
849 | const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K); | ||
850 | const struct gk20a_mmu_level *next_l; | ||
851 | |||
852 | /* | ||
853 | * Iterate to the bottom GMMU level - the PTE level. The levels array | ||
854 | * is always NULL terminated (by the update_entry function). | ||
855 | */ | ||
856 | do { | ||
857 | next_l = l + 1; | ||
858 | if (!next_l->update_entry) | ||
859 | break; | ||
860 | |||
861 | l++; | ||
862 | } while (true); | ||
863 | |||
864 | return (u32)(l->entry_size / sizeof(u32)); | ||
865 | } | ||
866 | |||
867 | /* | ||
868 | * Recursively walk the pages tables to find the PTE. | ||
869 | */ | ||
870 | static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, | ||
871 | struct nvgpu_gmmu_pd *pd, | ||
872 | u64 vaddr, int lvl, | ||
873 | struct nvgpu_gmmu_attrs *attrs, | ||
874 | u32 *data, | ||
875 | struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out, | ||
876 | u32 *pd_offs_out) | ||
877 | { | ||
878 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
879 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1]; | ||
880 | u32 pd_idx = pd_index(l, vaddr, attrs); | ||
881 | u32 pte_base; | ||
882 | u32 pte_size; | ||
883 | u32 i; | ||
884 | |||
885 | /* | ||
886 | * If this isn't the final level (i.e there's a valid next level) | ||
887 | * then find the next level PD and recurse. | ||
888 | */ | ||
889 | if (next_l->update_entry) { | ||
890 | struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx; | ||
891 | |||
892 | /* Invalid entry! */ | ||
893 | if (!pd_next->mem) | ||
894 | return -EINVAL; | ||
895 | |||
896 | return __nvgpu_locate_pte(g, vm, pd_next, | ||
897 | vaddr, lvl + 1, attrs, | ||
898 | data, pd_out, pd_idx_out, | ||
899 | pd_offs_out); | ||
900 | } | ||
901 | |||
902 | if (!pd->mem) | ||
903 | return -EINVAL; | ||
904 | |||
905 | /* | ||
906 | * Take into account the real offset into the nvgpu_mem since the PD | ||
907 | * may be located at an offset other than 0 (due to PD packing). | ||
908 | */ | ||
909 | pte_base = (pd->mem_offs / sizeof(u32)) + | ||
910 | pd_offset_from_index(l, pd_idx); | ||
911 | pte_size = (u32)(l->entry_size / sizeof(u32)); | ||
912 | |||
913 | if (data) { | ||
914 | map_gmmu_pages(g, pd); | ||
915 | for (i = 0; i < pte_size; i++) | ||
916 | data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i); | ||
917 | unmap_gmmu_pages(g, pd); | ||
918 | } | ||
919 | |||
920 | if (pd_out) | ||
921 | *pd_out = pd; | ||
922 | |||
923 | if (pd_idx_out) | ||
924 | *pd_idx_out = pd_idx; | ||
925 | |||
926 | if (pd_offs_out) | ||
927 | *pd_offs_out = pd_offset_from_index(l, pd_idx); | ||
928 | |||
929 | return 0; | ||
930 | } | ||
931 | |||
932 | int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) | ||
933 | { | ||
934 | struct nvgpu_gmmu_attrs attrs = { | ||
935 | .pgsz = 0, | ||
936 | }; | ||
937 | |||
938 | return __nvgpu_locate_pte(g, vm, &vm->pdb, | ||
939 | vaddr, 0, &attrs, | ||
940 | pte, NULL, NULL, NULL); | ||
941 | } | ||
942 | |||
943 | int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) | ||
944 | { | ||
945 | struct nvgpu_gmmu_pd *pd; | ||
946 | u32 pd_idx, pd_offs, pte_size, i; | ||
947 | int err; | ||
948 | struct nvgpu_gmmu_attrs attrs = { | ||
949 | .pgsz = 0, | ||
950 | }; | ||
951 | struct nvgpu_gmmu_attrs *attrs_ptr = &attrs; | ||
952 | |||
953 | err = __nvgpu_locate_pte(g, vm, &vm->pdb, | ||
954 | vaddr, 0, &attrs, | ||
955 | NULL, &pd, &pd_idx, &pd_offs); | ||
956 | if (err) | ||
957 | return err; | ||
958 | |||
959 | pte_size = __nvgpu_pte_words(g); | ||
960 | |||
961 | map_gmmu_pages(g, pd); | ||
962 | for (i = 0; i < pte_size; i++) { | ||
963 | pd_write(g, pd, pd_offs + i, pte[i]); | ||
964 | pte_dbg(g, attrs_ptr, | ||
965 | "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]); | ||
966 | } | ||
967 | unmap_gmmu_pages(g, pd); | ||
968 | |||
969 | /* | ||
970 | * Ensures the pd_write()s are done. The pd_write() does not do this | ||
971 | * since generally there's lots of pd_write()s called one after another. | ||
972 | * There probably also needs to be a TLB invalidate as well but we leave | ||
973 | * that to the caller of this function. | ||
974 | */ | ||
975 | wmb(); | ||
976 | |||
977 | return 0; | ||
978 | } | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 92e5eb5f..de129a5f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h | |||
@@ -273,6 +273,56 @@ static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, | |||
273 | nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data); | 273 | nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data); |
274 | } | 274 | } |
275 | 275 | ||
276 | /** | ||
277 | * __nvgpu_pte_words - Compute number of words in a PTE. | ||
278 | * | ||
279 | * @g - The GPU. | ||
280 | * | ||
281 | * This computes and returns the size of a PTE for the passed chip. | ||
282 | */ | ||
283 | u32 __nvgpu_pte_words(struct gk20a *g); | ||
284 | |||
285 | /** | ||
286 | * __nvgpu_get_pte - Get the contents of a PTE by virtual address | ||
287 | * | ||
288 | * @g - The GPU. | ||
289 | * @vm - VM to look in. | ||
290 | * @vaddr - GPU virtual address. | ||
291 | * @pte - [out] Set to the contents of the PTE. | ||
292 | * | ||
293 | * Find a PTE in the passed VM based on the passed GPU virtual address. This | ||
294 | * will @pte with a copy of the contents of the PTE. @pte must be an array of | ||
295 | * u32s large enough to contain the PTE. This can be computed using | ||
296 | * __nvgpu_pte_words(). | ||
297 | * | ||
298 | * If you wish to write to this PTE then you may modify @pte and then use the | ||
299 | * __nvgpu_set_pte(). | ||
300 | * | ||
301 | * This function returns 0 if the PTE is found and -EINVAL otherwise. | ||
302 | */ | ||
303 | int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); | ||
304 | |||
305 | /** | ||
306 | * __nvgpu_set_pte - Set a PTE based on virtual address | ||
307 | * | ||
308 | * @g - The GPU. | ||
309 | * @vm - VM to look in. | ||
310 | * @vaddr - GPU virtual address. | ||
311 | * @pte - The contents of the PTE to write. | ||
312 | * | ||
313 | * Find a PTE and overwrite the contents of that PTE with the passed in data | ||
314 | * located in @pte. If the PTE does not exist then no writing will happen. That | ||
315 | * is this function will not fill out the page tables for you. The expectation | ||
316 | * is that the passed @vaddr has already been mapped and this is just modifying | ||
317 | * the mapping (for instance changing invalid to valid). | ||
318 | * | ||
319 | * @pte must contain at least the required words for the PTE. See | ||
320 | * __nvgpu_pte_words(). | ||
321 | * | ||
322 | * This function returns 0 on success and -EINVAL otherwise. | ||
323 | */ | ||
324 | int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte); | ||
325 | |||
276 | 326 | ||
277 | /* | 327 | /* |
278 | * Internal debugging routines. Probably not something you want to use. | 328 | * Internal debugging routines. Probably not something you want to use. |