diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 98 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
2 files changed, 80 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 4c55f8ce..f327294a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1252,6 +1252,9 @@ void free_gmmu_pages(struct vm_gk20a *vm, | |||
1252 | if (!entry->mem.size) | 1252 | if (!entry->mem.size) |
1253 | return; | 1253 | return; |
1254 | 1254 | ||
1255 | if (entry->woffset) /* fake shadow mem */ | ||
1256 | return; | ||
1257 | |||
1255 | if (platform->is_fmodel) { | 1258 | if (platform->is_fmodel) { |
1256 | free_gmmu_phys_pages(vm, entry); | 1259 | free_gmmu_phys_pages(vm, entry); |
1257 | return; | 1260 | return; |
@@ -1317,35 +1320,64 @@ void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | |||
1317 | } | 1320 | } |
1318 | } | 1321 | } |
1319 | 1322 | ||
1320 | /* allocate a phys contig region big enough for a full | 1323 | /* |
1324 | * Allocate a phys contig region big enough for a full | ||
1321 | * sized gmmu page table for the given gmmu_page_size. | 1325 | * sized gmmu page table for the given gmmu_page_size. |
1322 | * the whole range is zeroed so it's "invalid"/will fault | 1326 | * the whole range is zeroed so it's "invalid"/will fault. |
1327 | * | ||
1328 | * If a previous entry is supplied, its memory will be used for | ||
1329 | * suballocation for this next entry too, if there is space. | ||
1323 | */ | 1330 | */ |
1324 | 1331 | ||
1325 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, | 1332 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, |
1326 | enum gmmu_pgsz_gk20a pgsz_idx, | 1333 | enum gmmu_pgsz_gk20a pgsz_idx, |
1327 | const struct gk20a_mmu_level *l, | 1334 | const struct gk20a_mmu_level *l, |
1328 | struct gk20a_mm_entry *entry) | 1335 | struct gk20a_mm_entry *entry, |
1336 | struct gk20a_mm_entry *prev_entry) | ||
1329 | { | 1337 | { |
1330 | int err; | 1338 | int err = -ENOMEM; |
1331 | int order; | 1339 | int order; |
1332 | struct gk20a *g = gk20a_from_vm(vm); | 1340 | struct gk20a *g = gk20a_from_vm(vm); |
1341 | u32 bytes; | ||
1333 | 1342 | ||
1334 | gk20a_dbg_fn(""); | 1343 | gk20a_dbg_fn(""); |
1335 | 1344 | ||
1336 | /* allocate enough pages for the table */ | 1345 | /* allocate enough pages for the table */ |
1337 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; | 1346 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; |
1338 | order += ilog2(l->entry_size); | 1347 | order += ilog2(l->entry_size); |
1348 | bytes = 1 << order; | ||
1339 | order -= PAGE_SHIFT; | 1349 | order -= PAGE_SHIFT; |
1340 | order = max(0, order); | 1350 | if (order < 0 && prev_entry) { |
1351 | /* try to suballocate from previous chunk */ | ||
1352 | u32 capacity = prev_entry->mem.size / bytes; | ||
1353 | u32 prev = prev_entry->woffset * sizeof(u32) / bytes; | ||
1354 | u32 free = capacity - prev - 1; | ||
1355 | |||
1356 | gk20a_dbg(gpu_dbg_pte, "cap %d prev %d free %d bytes %d", | ||
1357 | capacity, prev, free, bytes); | ||
1358 | |||
1359 | if (free) { | ||
1360 | memcpy(&entry->mem, &prev_entry->mem, | ||
1361 | sizeof(entry->mem)); | ||
1362 | entry->woffset = prev_entry->woffset | ||
1363 | + bytes / sizeof(u32); | ||
1364 | err = 0; | ||
1365 | } | ||
1366 | } | ||
1367 | |||
1368 | if (err) { | ||
1369 | /* no suballoc space */ | ||
1370 | order = max(0, order); | ||
1371 | err = alloc_gmmu_pages(vm, order, entry); | ||
1372 | entry->woffset = 0; | ||
1373 | } | ||
1341 | 1374 | ||
1342 | err = alloc_gmmu_pages(vm, order, entry); | 1375 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", |
1343 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", | ||
1344 | entry, | 1376 | entry, |
1345 | (entry->mem.sgt && entry->mem.aperture == APERTURE_SYSMEM) ? | 1377 | (entry->mem.sgt && entry->mem.aperture == APERTURE_SYSMEM) ? |
1346 | g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0) | 1378 | g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0) |
1347 | : 0, | 1379 | : 0, |
1348 | order); | 1380 | order, entry->woffset); |
1349 | if (err) | 1381 | if (err) |
1350 | return err; | 1382 | return err; |
1351 | entry->pgsz = pgsz_idx; | 1383 | entry->pgsz = pgsz_idx; |
@@ -3476,13 +3508,31 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
3476 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 3508 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
3477 | } | 3509 | } |
3478 | 3510 | ||
3511 | void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, | ||
3512 | size_t w, size_t data) | ||
3513 | { | ||
3514 | gk20a_mem_wr32(g, &entry->mem, entry->woffset + w, data); | ||
3515 | } | ||
3516 | |||
3517 | u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
3518 | { | ||
3519 | u64 base; | ||
3520 | |||
3521 | if (g->mm.has_physical_mode) | ||
3522 | base = sg_phys(entry->mem.sgt->sgl); | ||
3523 | else | ||
3524 | base = gk20a_mem_get_base_addr(g, &entry->mem, 0); | ||
3525 | |||
3526 | return base + entry->woffset * sizeof(u32); | ||
3527 | } | ||
3528 | |||
3479 | /* for gk20a the "video memory" apertures here are misnomers. */ | 3529 | /* for gk20a the "video memory" apertures here are misnomers. */ |
3480 | static inline u32 big_valid_pde0_bits(struct gk20a *g, | 3530 | static inline u32 big_valid_pde0_bits(struct gk20a *g, |
3481 | struct mem_desc *entry_mem) | 3531 | struct gk20a_mm_entry *entry) |
3482 | { | 3532 | { |
3483 | u64 pte_addr = gk20a_mem_get_base_addr(g, entry_mem, 0); | 3533 | u64 pte_addr = gk20a_pde_addr(g, entry); |
3484 | u32 pde0_bits = | 3534 | u32 pde0_bits = |
3485 | gk20a_aperture_mask(g, entry_mem, | 3535 | gk20a_aperture_mask(g, &entry->mem, |
3486 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), | 3536 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), |
3487 | gmmu_pde_aperture_big_video_memory_f()) | | 3537 | gmmu_pde_aperture_big_video_memory_f()) | |
3488 | gmmu_pde_address_big_sys_f( | 3538 | gmmu_pde_address_big_sys_f( |
@@ -3492,11 +3542,11 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g, | |||
3492 | } | 3542 | } |
3493 | 3543 | ||
3494 | static inline u32 small_valid_pde1_bits(struct gk20a *g, | 3544 | static inline u32 small_valid_pde1_bits(struct gk20a *g, |
3495 | struct mem_desc *entry_mem) | 3545 | struct gk20a_mm_entry *entry) |
3496 | { | 3546 | { |
3497 | u64 pte_addr = gk20a_mem_get_base_addr(g, entry_mem, 0); | 3547 | u64 pte_addr = gk20a_pde_addr(g, entry); |
3498 | u32 pde1_bits = | 3548 | u32 pde1_bits = |
3499 | gk20a_aperture_mask(g, entry_mem, | 3549 | gk20a_aperture_mask(g, &entry->mem, |
3500 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), | 3550 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), |
3501 | gmmu_pde_aperture_small_video_memory_f()) | | 3551 | gmmu_pde_aperture_small_video_memory_f()) | |
3502 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ | 3552 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ |
@@ -3536,11 +3586,11 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
3536 | 3586 | ||
3537 | pde_v[0] = gmmu_pde_size_full_f(); | 3587 | pde_v[0] = gmmu_pde_size_full_f(); |
3538 | pde_v[0] |= big_valid ? | 3588 | pde_v[0] |= big_valid ? |
3539 | big_valid_pde0_bits(g, &entry->mem) : | 3589 | big_valid_pde0_bits(g, entry) : |
3540 | gmmu_pde_aperture_big_invalid_f(); | 3590 | gmmu_pde_aperture_big_invalid_f(); |
3541 | 3591 | ||
3542 | pde_v[1] |= (small_valid ? | 3592 | pde_v[1] |= (small_valid ? |
3543 | small_valid_pde1_bits(g, &entry->mem) : | 3593 | small_valid_pde1_bits(g, entry) : |
3544 | (gmmu_pde_aperture_small_invalid_f() | | 3594 | (gmmu_pde_aperture_small_invalid_f() | |
3545 | gmmu_pde_vol_small_false_f())) | 3595 | gmmu_pde_vol_small_false_f())) |
3546 | | | 3596 | | |
@@ -3549,8 +3599,8 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
3549 | 3599 | ||
3550 | pde = pde_from_index(i); | 3600 | pde = pde_from_index(i); |
3551 | 3601 | ||
3552 | gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]); | 3602 | gk20a_pde_wr32(g, &vm->pdb, pde + 0, pde_v[0]); |
3553 | gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]); | 3603 | gk20a_pde_wr32(g, &vm->pdb, pde + 1, pde_v[1]); |
3554 | 3604 | ||
3555 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 3605 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
3556 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 3606 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); |
@@ -3633,8 +3683,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
3633 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 3683 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
3634 | } | 3684 | } |
3635 | 3685 | ||
3636 | gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]); | 3686 | gk20a_pde_wr32(g, pte, pte_from_index(i) + 0, pte_w[0]); |
3637 | gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]); | 3687 | gk20a_pde_wr32(g, pte, pte_from_index(i) + 1, pte_w[1]); |
3638 | 3688 | ||
3639 | if (*iova) { | 3689 | if (*iova) { |
3640 | *iova += page_size; | 3690 | *iova += page_size; |
@@ -3678,6 +3728,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
3678 | int err = 0; | 3728 | int err = 0; |
3679 | u32 pde_i; | 3729 | u32 pde_i; |
3680 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | 3730 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; |
3731 | struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL; | ||
3681 | 3732 | ||
3682 | gk20a_dbg_fn(""); | 3733 | gk20a_dbg_fn(""); |
3683 | 3734 | ||
@@ -3688,7 +3739,6 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
3688 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); | 3739 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); |
3689 | 3740 | ||
3690 | while (gpu_va < gpu_end) { | 3741 | while (gpu_va < gpu_end) { |
3691 | struct gk20a_mm_entry *next_pte = NULL; | ||
3692 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | 3742 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); |
3693 | 3743 | ||
3694 | /* Allocate next level */ | 3744 | /* Allocate next level */ |
@@ -3706,11 +3756,12 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
3706 | pte->pgsz = pgsz_idx; | 3756 | pte->pgsz = pgsz_idx; |
3707 | pte->num_entries = num_entries; | 3757 | pte->num_entries = num_entries; |
3708 | } | 3758 | } |
3759 | prev_pte = next_pte; | ||
3709 | next_pte = pte->entries + pde_i; | 3760 | next_pte = pte->entries + pde_i; |
3710 | 3761 | ||
3711 | if (!next_pte->mem.size) { | 3762 | if (!next_pte->mem.size) { |
3712 | err = gk20a_zalloc_gmmu_page_table(vm, | 3763 | err = gk20a_zalloc_gmmu_page_table(vm, |
3713 | pgsz_idx, next_l, next_pte); | 3764 | pgsz_idx, next_l, next_pte, prev_pte); |
3714 | if (err) | 3765 | if (err) |
3715 | return err; | 3766 | return err; |
3716 | } | 3767 | } |
@@ -4203,7 +4254,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4203 | name, vm->va_limit, pde_hi + 1); | 4254 | name, vm->va_limit, pde_hi + 1); |
4204 | 4255 | ||
4205 | /* allocate the page table directory */ | 4256 | /* allocate the page table directory */ |
4206 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], &vm->pdb); | 4257 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], |
4258 | &vm->pdb, NULL); | ||
4207 | if (err) | 4259 | if (err) |
4208 | goto clean_up_pdes; | 4260 | goto clean_up_pdes; |
4209 | 4261 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index ee2bb61e..fe10b046 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -183,6 +183,7 @@ struct gk20a_comptags { | |||
183 | struct gk20a_mm_entry { | 183 | struct gk20a_mm_entry { |
184 | /* backing for */ | 184 | /* backing for */ |
185 | struct mem_desc mem; | 185 | struct mem_desc mem; |
186 | u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */ | ||
186 | int pgsz; | 187 | int pgsz; |
187 | struct gk20a_mm_entry *entries; | 188 | struct gk20a_mm_entry *entries; |
188 | int num_entries; | 189 | int num_entries; |
@@ -631,6 +632,10 @@ u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | |||
631 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, | 632 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, |
632 | u32 sysmem_mask, u32 vidmem_mask); | 633 | u32 sysmem_mask, u32 vidmem_mask); |
633 | 634 | ||
635 | void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, | ||
636 | size_t w, size_t data); | ||
637 | u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry); | ||
638 | |||
634 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 639 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
635 | u64 map_offset, | 640 | u64 map_offset, |
636 | struct sg_table *sgt, | 641 | struct sg_table *sgt, |