diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 309 |
1 files changed, 145 insertions, 164 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index d7391c6d..c3867e9d 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -14,6 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <nvgpu/dma.h> | 16 | #include <nvgpu/dma.h> |
17 | #include <nvgpu/gmmu.h> | ||
17 | 18 | ||
18 | #include "gk20a/gk20a.h" | 19 | #include "gk20a/gk20a.h" |
19 | #include "gk20a/platform_gk20a.h" | 20 | #include "gk20a/platform_gk20a.h" |
@@ -149,206 +150,186 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
149 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 150 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
150 | } | 151 | } |
151 | 152 | ||
152 | static u32 pde3_from_index(u32 i) | 153 | static void update_gmmu_pde3_locked(struct vm_gk20a *vm, |
153 | { | 154 | const struct gk20a_mmu_level *l, |
154 | return i * gmmu_new_pde__size_v() / sizeof(u32); | 155 | struct nvgpu_gmmu_pd *pd, |
155 | } | 156 | u32 pd_idx, |
156 | 157 | u64 virt_addr, | |
157 | static u32 pte3_from_index(u32 i) | 158 | u64 phys_addr, |
158 | { | 159 | struct nvgpu_gmmu_attrs *attrs) |
159 | return i * gmmu_new_pte__size_v() / sizeof(u32); | ||
160 | } | ||
161 | |||
162 | static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | ||
163 | struct gk20a_mm_entry *parent, | ||
164 | u32 i, u32 gmmu_pgsz_idx, | ||
165 | struct scatterlist **sgl, | ||
166 | u64 *offset, | ||
167 | u64 *iova, | ||
168 | u32 kind_v, u64 *ctag, | ||
169 | bool cacheable, bool unmapped_pte, | ||
170 | int rw_flag, bool sparse, bool priv, | ||
171 | enum nvgpu_aperture aperture) | ||
172 | { | 160 | { |
173 | struct gk20a *g = gk20a_from_vm(vm); | 161 | struct gk20a *g = gk20a_from_vm(vm); |
174 | u64 pte_addr = 0; | 162 | u32 pd_offset = pd_offset_from_index(l, pd_idx); |
175 | struct gk20a_mm_entry *pte = parent->entries + i; | ||
176 | u32 pde_v[2] = {0, 0}; | 163 | u32 pde_v[2] = {0, 0}; |
177 | u32 pde; | ||
178 | |||
179 | gk20a_dbg_fn(""); | ||
180 | 164 | ||
181 | pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v(); | 165 | phys_addr >>= gmmu_new_pde_address_shift_v(); |
182 | 166 | ||
183 | pde_v[0] |= nvgpu_aperture_mask(g, &pte->mem, | 167 | pde_v[0] |= nvgpu_aperture_mask(g, &pd->mem, |
184 | gmmu_new_pde_aperture_sys_mem_ncoh_f(), | 168 | gmmu_new_pde_aperture_sys_mem_ncoh_f(), |
185 | gmmu_new_pde_aperture_video_memory_f()); | 169 | gmmu_new_pde_aperture_video_memory_f()); |
186 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); | 170 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); |
187 | pde_v[0] |= gmmu_new_pde_vol_true_f(); | 171 | pde_v[0] |= gmmu_new_pde_vol_true_f(); |
188 | pde_v[1] |= pte_addr >> 24; | 172 | pde_v[1] |= phys_addr >> 24; |
189 | pde = pde3_from_index(i); | 173 | |
190 | 174 | pd_write(g, pd, pd_offset + 0, pde_v[0]); | |
191 | gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]); | 175 | pd_write(g, pd, pd_offset + 1, pde_v[1]); |
192 | gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]); | 176 | |
193 | 177 | pte_dbg(g, attrs, | |
194 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 178 | "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | " |
195 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 179 | "GPU %#-12llx phys %#-12llx " |
196 | gk20a_dbg_fn("done"); | 180 | "[0x%08x, 0x%08x]", |
197 | return 0; | 181 | pd_idx, l->entry_size, pd_offset, |
182 | virt_addr, phys_addr, | ||
183 | pde_v[1], pde_v[0]); | ||
198 | } | 184 | } |
199 | 185 | ||
200 | static u32 pde0_from_index(u32 i) | 186 | static void update_gmmu_pde0_locked(struct vm_gk20a *vm, |
201 | { | 187 | const struct gk20a_mmu_level *l, |
202 | return i * gmmu_new_dual_pde__size_v() / sizeof(u32); | 188 | struct nvgpu_gmmu_pd *pd, |
203 | } | 189 | u32 pd_idx, |
204 | 190 | u64 virt_addr, | |
205 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | 191 | u64 phys_addr, |
206 | struct gk20a_mm_entry *pte, | 192 | struct nvgpu_gmmu_attrs *attrs) |
207 | u32 i, u32 gmmu_pgsz_idx, | ||
208 | struct scatterlist **sgl, | ||
209 | u64 *offset, | ||
210 | u64 *iova, | ||
211 | u32 kind_v, u64 *ctag, | ||
212 | bool cacheable, bool unmapped_pte, | ||
213 | int rw_flag, bool sparse, bool priv, | ||
214 | enum nvgpu_aperture aperture) | ||
215 | { | 193 | { |
216 | struct gk20a *g = gk20a_from_vm(vm); | 194 | struct gk20a *g = gk20a_from_vm(vm); |
217 | bool small_valid, big_valid; | 195 | bool small_valid, big_valid; |
218 | u32 pte_addr_small = 0, pte_addr_big = 0; | 196 | u32 small_addr = 0, big_addr = 0; |
219 | struct gk20a_mm_entry *entry = pte->entries + i; | 197 | u32 pd_offset = pd_offset_from_index(l, pd_idx); |
220 | u32 pde_v[4] = {0, 0, 0, 0}; | 198 | u32 pde_v[4] = {0, 0, 0, 0}; |
221 | u32 pde; | ||
222 | |||
223 | gk20a_dbg_fn(""); | ||
224 | 199 | ||
225 | small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small; | 200 | small_valid = attrs->pgsz == gmmu_page_size_small; |
226 | big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; | 201 | big_valid = attrs->pgsz == gmmu_page_size_big; |
227 | 202 | ||
228 | if (small_valid) { | 203 | if (small_valid) |
229 | pte_addr_small = gk20a_pde_addr(g, entry) | 204 | small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v(); |
230 | >> gmmu_new_dual_pde_address_shift_v(); | ||
231 | } | ||
232 | 205 | ||
233 | if (big_valid) | 206 | if (big_valid) |
234 | pte_addr_big = gk20a_pde_addr(g, entry) | 207 | big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v(); |
235 | >> gmmu_new_dual_pde_address_big_shift_v(); | ||
236 | 208 | ||
237 | if (small_valid) { | 209 | if (small_valid) { |
238 | pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small); | 210 | pde_v[2] |= |
239 | pde_v[2] |= nvgpu_aperture_mask(g, &entry->mem, | 211 | gmmu_new_dual_pde_address_small_sys_f(small_addr); |
212 | pde_v[2] |= nvgpu_aperture_mask(g, &pd->mem, | ||
240 | gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), | 213 | gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), |
241 | gmmu_new_dual_pde_aperture_small_video_memory_f()); | 214 | gmmu_new_dual_pde_aperture_small_video_memory_f()); |
242 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); | 215 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); |
243 | pde_v[3] |= pte_addr_small >> 24; | 216 | pde_v[3] |= small_addr >> 24; |
244 | } | 217 | } |
245 | 218 | ||
246 | if (big_valid) { | 219 | if (big_valid) { |
247 | pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big); | 220 | pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr); |
248 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); | 221 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); |
249 | pde_v[0] |= nvgpu_aperture_mask(g, &entry->mem, | 222 | pde_v[0] |= nvgpu_aperture_mask(g, &pd->mem, |
250 | gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), | 223 | gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), |
251 | gmmu_new_dual_pde_aperture_big_video_memory_f()); | 224 | gmmu_new_dual_pde_aperture_big_video_memory_f()); |
252 | pde_v[1] |= pte_addr_big >> 28; | 225 | pde_v[1] |= big_addr >> 28; |
253 | } | 226 | } |
254 | 227 | ||
255 | pde = pde0_from_index(i); | 228 | pd_write(g, pd, pd_offset + 0, pde_v[0]); |
256 | 229 | pd_write(g, pd, pd_offset + 1, pde_v[1]); | |
257 | gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]); | 230 | pd_write(g, pd, pd_offset + 2, pde_v[2]); |
258 | gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]); | 231 | pd_write(g, pd, pd_offset + 3, pde_v[3]); |
259 | gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]); | 232 | |
260 | gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]); | 233 | pte_dbg(g, attrs, |
261 | 234 | "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | " | |
262 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", | 235 | "GPU %#-12llx phys %#-12llx " |
263 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); | 236 | "[0x%08x, 0x%08x, 0x%08x, 0x%08x]", |
264 | gk20a_dbg_fn("done"); | 237 | pd_idx, l->entry_size, pd_offset, |
265 | return 0; | 238 | small_valid ? 'S' : '-', |
239 | big_valid ? 'B' : '-', | ||
240 | virt_addr, phys_addr, | ||
241 | pde_v[3], pde_v[2], pde_v[1], pde_v[0]); | ||
266 | } | 242 | } |
267 | 243 | ||
268 | static int update_gmmu_pte_locked(struct vm_gk20a *vm, | 244 | static void __update_pte(struct vm_gk20a *vm, |
269 | struct gk20a_mm_entry *pte, | 245 | u32 *pte_w, |
270 | u32 i, u32 gmmu_pgsz_idx, | 246 | u64 phys_addr, |
271 | struct scatterlist **sgl, | 247 | struct nvgpu_gmmu_attrs *attrs) |
272 | u64 *offset, | ||
273 | u64 *iova, | ||
274 | u32 kind_v, u64 *ctag, | ||
275 | bool cacheable, bool unmapped_pte, | ||
276 | int rw_flag, bool sparse, bool priv, | ||
277 | enum nvgpu_aperture aperture) | ||
278 | { | 248 | { |
279 | struct gk20a *g = vm->mm->g; | 249 | struct gk20a *g = gk20a_from_vm(vm); |
280 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | ||
281 | u64 ctag_granularity = g->ops.fb.compression_page_size(g); | 250 | u64 ctag_granularity = g->ops.fb.compression_page_size(g); |
282 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | 251 | u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; |
283 | u32 pte_i; | 252 | u32 pte_valid = attrs->valid ? |
284 | 253 | gmmu_new_pte_valid_true_f() : | |
285 | if (*iova) { | 254 | gmmu_new_pte_valid_false_f(); |
286 | u32 pte_valid = unmapped_pte ? | 255 | u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v(); |
287 | gmmu_new_pte_valid_false_f() : | 256 | u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ? |
288 | gmmu_new_pte_valid_true_f(); | 257 | gmmu_new_pte_address_sys_f(phys_shifted) : |
289 | u32 iova_v = *iova >> gmmu_new_pte_address_shift_v(); | 258 | gmmu_new_pte_address_vid_f(phys_shifted); |
290 | u32 pte_addr = aperture == APERTURE_SYSMEM ? | 259 | u32 pte_tgt = __nvgpu_aperture_mask(g, attrs->aperture, |
291 | gmmu_new_pte_address_sys_f(iova_v) : | 260 | gmmu_new_pte_aperture_sys_mem_ncoh_f(), |
292 | gmmu_new_pte_address_vid_f(iova_v); | 261 | gmmu_new_pte_aperture_video_memory_f()); |
293 | u32 pte_tgt = __nvgpu_aperture_mask(g, aperture, | 262 | |
294 | gmmu_new_pte_aperture_sys_mem_ncoh_f(), | 263 | pte_w[0] = pte_valid | pte_addr | pte_tgt; |
295 | gmmu_new_pte_aperture_video_memory_f()); | 264 | |
296 | 265 | if (attrs->priv) | |
297 | pte_w[0] = pte_valid | pte_addr | pte_tgt; | 266 | pte_w[0] |= gmmu_new_pte_privilege_true_f(); |
298 | 267 | ||
299 | if (priv) | 268 | pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) | |
300 | pte_w[0] |= gmmu_new_pte_privilege_true_f(); | 269 | gmmu_new_pte_kind_f(attrs->kind_v) | |
301 | 270 | gmmu_new_pte_comptagline_f((u32)(attrs->ctag / | |
302 | pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) | | 271 | ctag_granularity)); |
303 | gmmu_new_pte_kind_f(kind_v) | | 272 | |
304 | gmmu_new_pte_comptagline_f((u32)(*ctag / ctag_granularity)); | 273 | if (attrs->rw_flag == gk20a_mem_flag_read_only) |
305 | 274 | pte_w[0] |= gmmu_new_pte_read_only_true_f(); | |
306 | if (rw_flag == gk20a_mem_flag_read_only) | 275 | |
307 | pte_w[0] |= gmmu_new_pte_read_only_true_f(); | 276 | if (!attrs->valid && !attrs->cacheable) |
308 | if (unmapped_pte && !cacheable) | 277 | pte_w[0] |= gmmu_new_pte_read_only_true_f(); |
309 | pte_w[0] |= gmmu_new_pte_read_only_true_f(); | 278 | else if (!attrs->cacheable) |
310 | else if (!cacheable) | ||
311 | pte_w[0] |= gmmu_new_pte_vol_true_f(); | ||
312 | |||
313 | gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d" | ||
314 | " ctag=%d vol=%d" | ||
315 | " [0x%08x, 0x%08x]", | ||
316 | i, *iova, | ||
317 | kind_v, (u32)(*ctag / ctag_granularity), !cacheable, | ||
318 | pte_w[1], pte_w[0]); | ||
319 | |||
320 | if (*ctag) | ||
321 | *ctag += page_size; | ||
322 | } else if (sparse) { | ||
323 | pte_w[0] = gmmu_new_pte_valid_false_f(); | ||
324 | pte_w[0] |= gmmu_new_pte_vol_true_f(); | 279 | pte_w[0] |= gmmu_new_pte_vol_true_f(); |
325 | } else { | ||
326 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | ||
327 | } | ||
328 | 280 | ||
329 | pte_i = pte3_from_index(i); | 281 | if (attrs->ctag) |
330 | 282 | attrs->ctag += page_size; | |
331 | gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]); | 283 | |
332 | gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]); | 284 | } |
333 | 285 | ||
334 | if (*iova) { | 286 | static void __update_pte_sparse(u32 *pte_w) |
335 | *iova += page_size; | 287 | { |
336 | *offset += page_size; | 288 | pte_w[0] = gmmu_new_pte_valid_false_f(); |
337 | if (*sgl && *offset + page_size > (*sgl)->length) { | 289 | pte_w[0] |= gmmu_new_pte_vol_true_f(); |
338 | u64 new_iova; | 290 | } |
339 | *sgl = sg_next(*sgl); | 291 | |
340 | if (*sgl) { | 292 | static void update_gmmu_pte_locked(struct vm_gk20a *vm, |
341 | new_iova = sg_phys(*sgl); | 293 | const struct gk20a_mmu_level *l, |
342 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | 294 | struct nvgpu_gmmu_pd *pd, |
343 | new_iova, (*sgl)->length); | 295 | u32 pd_idx, |
344 | if (new_iova) { | 296 | u64 virt_addr, |
345 | *offset = 0; | 297 | u64 phys_addr, |
346 | *iova = new_iova; | 298 | struct nvgpu_gmmu_attrs *attrs) |
347 | } | 299 | { |
348 | } | 300 | struct gk20a *g = vm->mm->g; |
349 | } | 301 | u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; |
350 | } | 302 | u32 pd_offset = pd_offset_from_index(l, pd_idx); |
351 | return 0; | 303 | u32 pte_w[2] = {0, 0}; |
304 | |||
305 | if (phys_addr) | ||
306 | __update_pte(vm, pte_w, phys_addr, attrs); | ||
307 | else if (attrs->sparse) | ||
308 | __update_pte_sparse(pte_w); | ||
309 | |||
310 | pte_dbg(g, attrs, | ||
311 | "vm=%s " | ||
312 | "PTE: i=%-4u size=%-2u offs=%-4u | " | ||
313 | "GPU %#-12llx phys %#-12llx " | ||
314 | "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c " | ||
315 | "ctag=0x%08x " | ||
316 | "[0x%08x, 0x%08x]", | ||
317 | vm->name, | ||
318 | pd_idx, l->entry_size, pd_offset, | ||
319 | virt_addr, phys_addr, | ||
320 | page_size >> 10, | ||
321 | nvgpu_gmmu_perm_str(attrs->rw_flag), | ||
322 | attrs->kind_v, | ||
323 | nvgpu_aperture_str(attrs->aperture), | ||
324 | attrs->valid ? 'V' : '-', | ||
325 | attrs->cacheable ? 'C' : '-', | ||
326 | attrs->sparse ? 'S' : '-', | ||
327 | attrs->priv ? 'P' : '-', | ||
328 | (u32)attrs->ctag / g->ops.fb.compression_page_size(g), | ||
329 | pte_w[1], pte_w[0]); | ||
330 | |||
331 | pd_write(g, pd, pd_offset + 0, pte_w[0]); | ||
332 | pd_write(g, pd, pd_offset + 1, pte_w[1]); | ||
352 | } | 333 | } |
353 | 334 | ||
354 | static const struct gk20a_mmu_level gp10b_mm_levels[] = { | 335 | static const struct gk20a_mmu_level gp10b_mm_levels[] = { |
@@ -384,7 +365,7 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, | |||
384 | static void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, | 365 | static void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, |
385 | struct vm_gk20a *vm) | 366 | struct vm_gk20a *vm) |
386 | { | 367 | { |
387 | u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0); | 368 | u64 pdb_addr = nvgpu_mem_get_base_addr(g, &vm->pdb.mem, 0); |
388 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 369 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
389 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 370 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
390 | 371 | ||