summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c521
1 files changed, 0 insertions, 521 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a1873a30..e7bcf6f0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -124,15 +124,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl)
124 * 124 *
125 */ 125 */
126 126
127static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
128 enum gmmu_pgsz_gk20a pgsz_idx,
129 struct sg_table *sgt, u64 buffer_offset,
130 u64 first_vaddr, u64 last_vaddr,
131 u8 kind_v, u32 ctag_offset, bool cacheable,
132 bool umapped_pte, int rw_flag,
133 bool sparse,
134 bool priv,
135 enum nvgpu_aperture aperture);
136static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); 127static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
137static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); 128static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
138static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); 129static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
@@ -781,104 +772,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g)
781#endif 772#endif
782} 773}
783 774
784static void free_gmmu_phys_pages(struct vm_gk20a *vm,
785 struct gk20a_mm_entry *entry)
786{
787 gk20a_dbg_fn("");
788
789 /* note: mem_desc slightly abused (wrt. free_gmmu_pages) */
790
791 free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size));
792 entry->mem.cpu_va = NULL;
793
794 sg_free_table(entry->mem.priv.sgt);
795 nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
796 entry->mem.priv.sgt = NULL;
797 entry->mem.size = 0;
798 entry->mem.aperture = APERTURE_INVALID;
799}
800
801static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
802{
803 FLUSH_CPU_DCACHE(entry->mem.cpu_va,
804 sg_phys(entry->mem.priv.sgt->sgl),
805 entry->mem.priv.sgt->sgl->length);
806 return 0;
807}
808
809static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
810{
811 FLUSH_CPU_DCACHE(entry->mem.cpu_va,
812 sg_phys(entry->mem.priv.sgt->sgl),
813 entry->mem.priv.sgt->sgl->length);
814}
815
816void free_gmmu_pages(struct vm_gk20a *vm,
817 struct gk20a_mm_entry *entry)
818{
819 struct gk20a *g = gk20a_from_vm(vm);
820
821 gk20a_dbg_fn("");
822
823 if (!entry->mem.size)
824 return;
825
826 if (entry->woffset) /* fake shadow mem */
827 return;
828
829 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
830 free_gmmu_phys_pages(vm, entry);
831 return;
832 }
833
834 nvgpu_dma_free(g, &entry->mem);
835}
836
837int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
838{
839 gk20a_dbg_fn("");
840
841 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
842 return map_gmmu_phys_pages(entry);
843
844 if (IS_ENABLED(CONFIG_ARM64)) {
845 if (entry->mem.aperture == APERTURE_VIDMEM)
846 return 0;
847
848 FLUSH_CPU_DCACHE(entry->mem.cpu_va,
849 sg_phys(entry->mem.priv.sgt->sgl),
850 entry->mem.size);
851 } else {
852 int err = nvgpu_mem_begin(g, &entry->mem);
853
854 if (err)
855 return err;
856 }
857
858 return 0;
859}
860
861void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
862{
863 gk20a_dbg_fn("");
864
865 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
866 unmap_gmmu_phys_pages(entry);
867 return;
868 }
869
870 if (IS_ENABLED(CONFIG_ARM64)) {
871 if (entry->mem.aperture == APERTURE_VIDMEM)
872 return;
873
874 FLUSH_CPU_DCACHE(entry->mem.cpu_va,
875 sg_phys(entry->mem.priv.sgt->sgl),
876 entry->mem.size);
877 } else {
878 nvgpu_mem_end(g, &entry->mem);
879 }
880}
881
882int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) 775int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
883{ 776{
884 return vm->mmu_levels[0].lo_bit[0]; 777 return vm->mmu_levels[0].lo_bit[0];
@@ -909,21 +802,6 @@ static u32 pte_from_index(u32 i)
909 return i * gmmu_pte__size_v() / sizeof(u32); 802 return i * gmmu_pte__size_v() / sizeof(u32);
910} 803}
911 804
912u32 pte_index_from_vaddr(struct vm_gk20a *vm,
913 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
914{
915 u32 ret;
916 /* mask off pde part */
917 addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL);
918
919 /* shift over to get pte index. note assumption that pte index
920 * doesn't leak over into the high 32b */
921 ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx]));
922
923 gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
924 return ret;
925}
926
927int nvgpu_vm_get_buffers(struct vm_gk20a *vm, 805int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
928 struct nvgpu_mapped_buf ***mapped_buffers, 806 struct nvgpu_mapped_buf ***mapped_buffers,
929 int *num_buffers) 807 int *num_buffers)
@@ -1096,141 +974,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1096 return 0; 974 return 0;
1097} 975}
1098 976
1099u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1100 u64 map_offset,
1101 struct sg_table *sgt,
1102 u64 buffer_offset,
1103 u64 size,
1104 int pgsz_idx,
1105 u8 kind_v,
1106 u32 ctag_offset,
1107 u32 flags,
1108 int rw_flag,
1109 bool clear_ctags,
1110 bool sparse,
1111 bool priv,
1112 struct vm_gk20a_mapping_batch *batch,
1113 enum nvgpu_aperture aperture)
1114{
1115 int err = 0;
1116 bool allocated = false;
1117 struct gk20a *g = gk20a_from_vm(vm);
1118 int ctag_granularity = g->ops.fb.compression_page_size(g);
1119 u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
1120
1121 /* Allocate (or validate when map_offset != 0) the virtual address. */
1122 if (!map_offset) {
1123 map_offset = __nvgpu_vm_alloc_va(vm, size,
1124 pgsz_idx);
1125 if (!map_offset) {
1126 nvgpu_err(g, "failed to allocate va space");
1127 err = -ENOMEM;
1128 goto fail_alloc;
1129 }
1130 allocated = true;
1131 }
1132
1133 gk20a_dbg(gpu_dbg_map,
1134 "gv: 0x%04x_%08x + 0x%-7llx "
1135 "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
1136 "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
1137 "kind=0x%x flags=0x%x apt=%s",
1138 u64_hi32(map_offset), u64_lo32(map_offset), size,
1139 sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0,
1140 sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0,
1141 sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0,
1142 sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0,
1143 vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm),
1144 ctag_lines, ctag_offset,
1145 kind_v, flags, nvgpu_aperture_str(aperture));
1146
1147 err = update_gmmu_ptes_locked(vm, pgsz_idx,
1148 sgt,
1149 buffer_offset,
1150 map_offset, map_offset + size,
1151 kind_v,
1152 ctag_offset,
1153 flags &
1154 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1155 flags &
1156 NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE,
1157 rw_flag,
1158 sparse,
1159 priv,
1160 aperture);
1161 if (err) {
1162 nvgpu_err(g, "failed to update ptes on map");
1163 goto fail_validate;
1164 }
1165
1166 if (!batch)
1167 g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1168 else
1169 batch->need_tlb_invalidate = true;
1170
1171 return map_offset;
1172fail_validate:
1173 if (allocated)
1174 __nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
1175fail_alloc:
1176 nvgpu_err(g, "%s: failed with err=%d", __func__, err);
1177 return 0;
1178}
1179
1180void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1181 u64 vaddr,
1182 u64 size,
1183 int pgsz_idx,
1184 bool va_allocated,
1185 int rw_flag,
1186 bool sparse,
1187 struct vm_gk20a_mapping_batch *batch)
1188{
1189 int err = 0;
1190 struct gk20a *g = gk20a_from_vm(vm);
1191
1192 if (va_allocated) {
1193 err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
1194 if (err) {
1195 nvgpu_err(g, "failed to free va");
1196 return;
1197 }
1198 }
1199
1200 /* unmap here needs to know the page size we assigned at mapping */
1201 err = update_gmmu_ptes_locked(vm,
1202 pgsz_idx,
1203 NULL, /* n/a for unmap */
1204 0,
1205 vaddr,
1206 vaddr + size,
1207 0, 0, false /* n/a for unmap */,
1208 false, rw_flag,
1209 sparse, 0,
1210 APERTURE_INVALID); /* don't care for unmap */
1211 if (err)
1212 nvgpu_err(g, "failed to update gmmu ptes on unmap");
1213
1214 /* flush l2 so any dirty lines are written out *now*.
1215 * also as we could potentially be switching this buffer
1216 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1217 * some point in the future we need to invalidate l2. e.g. switching
1218 * from a render buffer unmap (here) to later using the same memory
1219 * for gmmu ptes. note the positioning of this relative to any smmu
1220 * unmapping (below). */
1221
1222 if (!batch) {
1223 gk20a_mm_l2_flush(g, true);
1224 g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1225 } else {
1226 if (!batch->gpu_l2_flushed) {
1227 gk20a_mm_l2_flush(g, true);
1228 batch->gpu_l2_flushed = true;
1229 }
1230 batch->need_tlb_invalidate = true;
1231 }
1232}
1233
1234enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, 977enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
1235 struct dma_buf *dmabuf) 978 struct dma_buf *dmabuf)
1236{ 979{
@@ -2036,254 +1779,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2036 return 0; 1779 return 0;
2037} 1780}
2038 1781
2039static int update_gmmu_level_locked(struct vm_gk20a *vm,
2040 struct gk20a_mm_entry *pte,
2041 enum gmmu_pgsz_gk20a pgsz_idx,
2042 struct scatterlist **sgl,
2043 u64 *offset,
2044 u64 *iova,
2045 u64 gpu_va, u64 gpu_end,
2046 u8 kind_v, u64 *ctag,
2047 bool cacheable, bool unmapped_pte,
2048 int rw_flag,
2049 bool sparse,
2050 int lvl,
2051 bool priv,
2052 enum nvgpu_aperture aperture)
2053{
2054 struct gk20a *g = gk20a_from_vm(vm);
2055 const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
2056 const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
2057 int err = 0;
2058 u32 pde_i;
2059 u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
2060 struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL;
2061
2062 gk20a_dbg_fn("");
2063
2064 pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL))
2065 >> (u64)l->lo_bit[pgsz_idx];
2066
2067 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
2068 pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
2069
2070 while (gpu_va < gpu_end) {
2071 u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
2072
2073 /* Allocate next level */
2074 if (next_l->update_entry) {
2075 if (!pte->entries) {
2076 int num_entries =
2077 1 <<
2078 (l->hi_bit[pgsz_idx]
2079 - l->lo_bit[pgsz_idx] + 1);
2080 pte->entries =
2081 nvgpu_vzalloc(g,
2082 sizeof(struct gk20a_mm_entry) *
2083 num_entries);
2084 if (!pte->entries)
2085 return -ENOMEM;
2086 pte->pgsz = pgsz_idx;
2087 pte->num_entries = num_entries;
2088 }
2089 prev_pte = next_pte;
2090 next_pte = pte->entries + pde_i;
2091
2092 if (!next_pte->mem.size) {
2093 err = nvgpu_zalloc_gmmu_page_table(vm,
2094 pgsz_idx, next_l, next_pte, prev_pte);
2095 if (err)
2096 return err;
2097 }
2098 }
2099
2100 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2101 sgl, offset, iova,
2102 kind_v, ctag, cacheable, unmapped_pte,
2103 rw_flag, sparse, priv, aperture);
2104 if (err)
2105 return err;
2106
2107 if (next_l->update_entry) {
2108 /* get cpu access to the ptes */
2109 err = map_gmmu_pages(g, next_pte);
2110 if (err) {
2111 nvgpu_err(g,
2112 "couldn't map ptes for update as=%d",
2113 vm_aspace_id(vm));
2114 return err;
2115 }
2116 err = update_gmmu_level_locked(vm, next_pte,
2117 pgsz_idx,
2118 sgl,
2119 offset,
2120 iova,
2121 gpu_va,
2122 next,
2123 kind_v, ctag, cacheable, unmapped_pte,
2124 rw_flag, sparse, lvl+1, priv, aperture);
2125 unmap_gmmu_pages(g, next_pte);
2126
2127 if (err)
2128 return err;
2129 }
2130
2131 pde_i++;
2132 gpu_va = next;
2133 }
2134
2135 gk20a_dbg_fn("done");
2136
2137 return 0;
2138}
2139
2140static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2141 enum gmmu_pgsz_gk20a pgsz_idx,
2142 struct sg_table *sgt,
2143 u64 buffer_offset,
2144 u64 gpu_va, u64 gpu_end,
2145 u8 kind_v, u32 ctag_offset,
2146 bool cacheable, bool unmapped_pte,
2147 int rw_flag,
2148 bool sparse,
2149 bool priv,
2150 enum nvgpu_aperture aperture)
2151{
2152 struct gk20a *g = gk20a_from_vm(vm);
2153 int ctag_granularity = g->ops.fb.compression_page_size(g);
2154 u64 ctag = (u64)ctag_offset * (u64)ctag_granularity;
2155 u64 iova = 0;
2156 u64 space_to_skip = buffer_offset;
2157 u64 map_size = gpu_end - gpu_va;
2158 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
2159 int err;
2160 struct scatterlist *sgl = NULL;
2161 struct nvgpu_page_alloc *alloc = NULL;
2162 struct page_alloc_chunk *chunk = NULL;
2163 u64 length;
2164
2165 /* note: here we need to map kernel to small, since the
2166 * low-level mmu code assumes 0 is small and 1 is big pages */
2167 if (pgsz_idx == gmmu_page_size_kernel)
2168 pgsz_idx = gmmu_page_size_small;
2169
2170 if (space_to_skip & (page_size - 1))
2171 return -EINVAL;
2172
2173 err = map_gmmu_pages(g, &vm->pdb);
2174 if (err) {
2175 nvgpu_err(g,
2176 "couldn't map ptes for update as=%d",
2177 vm_aspace_id(vm));
2178 return err;
2179 }
2180
2181 if (aperture == APERTURE_VIDMEM) {
2182 gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx",
2183 pgsz_idx, gpu_va, gpu_end-1, iova);
2184
2185 if (sgt) {
2186 alloc = get_vidmem_page_alloc(sgt->sgl);
2187
2188 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
2189 page_alloc_chunk, list_entry) {
2190 if (space_to_skip &&
2191 space_to_skip > chunk->length) {
2192 space_to_skip -= chunk->length;
2193 } else {
2194 iova = chunk->base + space_to_skip;
2195 length = chunk->length - space_to_skip;
2196 length = min(length, map_size);
2197 space_to_skip = 0;
2198
2199 err = update_gmmu_level_locked(vm,
2200 &vm->pdb, pgsz_idx,
2201 &sgl,
2202 &space_to_skip,
2203 &iova,
2204 gpu_va, gpu_va + length,
2205 kind_v, &ctag,
2206 cacheable, unmapped_pte,
2207 rw_flag, sparse, 0, priv,
2208 aperture);
2209 if (err)
2210 break;
2211
2212 /* need to set explicit zero here */
2213 space_to_skip = 0;
2214 gpu_va += length;
2215 map_size -= length;
2216
2217 if (!map_size)
2218 break;
2219 }
2220 }
2221 } else {
2222 err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
2223 &sgl,
2224 &space_to_skip,
2225 &iova,
2226 gpu_va, gpu_end,
2227 kind_v, &ctag,
2228 cacheable, unmapped_pte, rw_flag,
2229 sparse, 0, priv,
2230 aperture);
2231 }
2232 } else {
2233 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
2234 pgsz_idx,
2235 sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
2236 : 0ULL,
2237 buffer_offset,
2238 sgt ? sgt->nents : 0);
2239
2240 gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
2241 pgsz_idx, gpu_va, gpu_end-1, iova);
2242
2243 if (sgt) {
2244 iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
2245 if (!vm->mm->bypass_smmu && iova) {
2246 iova += space_to_skip;
2247 } else {
2248 sgl = sgt->sgl;
2249
2250 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
2251 (u64)sg_phys(sgl),
2252 sgl->length);
2253
2254 while (space_to_skip && sgl &&
2255 space_to_skip + page_size > sgl->length) {
2256 space_to_skip -= sgl->length;
2257 sgl = sg_next(sgl);
2258 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
2259 (u64)sg_phys(sgl),
2260 sgl->length);
2261 }
2262
2263 iova = sg_phys(sgl) + space_to_skip;
2264 }
2265 }
2266
2267 err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
2268 &sgl,
2269 &space_to_skip,
2270 &iova,
2271 gpu_va, gpu_end,
2272 kind_v, &ctag,
2273 cacheable, unmapped_pte, rw_flag,
2274 sparse, 0, priv,
2275 aperture);
2276 }
2277
2278 unmap_gmmu_pages(g, &vm->pdb);
2279
2280 smp_mb();
2281
2282 gk20a_dbg_fn("done");
2283
2284 return err;
2285}
2286
2287/* NOTE! mapped_buffers lock must be held */ 1782/* NOTE! mapped_buffers lock must be held */
2288void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, 1783void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
2289 struct vm_gk20a_mapping_batch *batch) 1784 struct vm_gk20a_mapping_batch *batch)
@@ -2341,22 +1836,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
2341 return; 1836 return;
2342} 1837}
2343 1838
2344void gk20a_vm_free_entries(struct vm_gk20a *vm,
2345 struct gk20a_mm_entry *parent,
2346 int level)
2347{
2348 int i;
2349
2350 if (parent->entries)
2351 for (i = 0; i < parent->num_entries; i++)
2352 gk20a_vm_free_entries(vm, &parent->entries[i], level+1);
2353
2354 if (parent->mem.size)
2355 free_gmmu_pages(vm, parent);
2356 nvgpu_vfree(vm->mm->g, parent->entries);
2357 parent->entries = NULL;
2358}
2359
2360const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { 1839const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
2361 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, 1840 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
2362 .lo_bit = {26, 26}, 1841 .lo_bit = {26, 26},