summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c98
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h5
2 files changed, 80 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 4c55f8ce..f327294a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1252,6 +1252,9 @@ void free_gmmu_pages(struct vm_gk20a *vm,
1252 if (!entry->mem.size) 1252 if (!entry->mem.size)
1253 return; 1253 return;
1254 1254
1255 if (entry->woffset) /* fake shadow mem */
1256 return;
1257
1255 if (platform->is_fmodel) { 1258 if (platform->is_fmodel) {
1256 free_gmmu_phys_pages(vm, entry); 1259 free_gmmu_phys_pages(vm, entry);
1257 return; 1260 return;
@@ -1317,35 +1320,64 @@ void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
1317 } 1320 }
1318} 1321}
1319 1322
1320/* allocate a phys contig region big enough for a full 1323/*
1324 * Allocate a phys contig region big enough for a full
1321 * sized gmmu page table for the given gmmu_page_size. 1325 * sized gmmu page table for the given gmmu_page_size.
1322 * the whole range is zeroed so it's "invalid"/will fault 1326 * the whole range is zeroed so it's "invalid"/will fault.
1327 *
1328 * If a previous entry is supplied, its memory will be used for
1329 * suballocation for this next entry too, if there is space.
1323 */ 1330 */
1324 1331
1325static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, 1332static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm,
1326 enum gmmu_pgsz_gk20a pgsz_idx, 1333 enum gmmu_pgsz_gk20a pgsz_idx,
1327 const struct gk20a_mmu_level *l, 1334 const struct gk20a_mmu_level *l,
1328 struct gk20a_mm_entry *entry) 1335 struct gk20a_mm_entry *entry,
1336 struct gk20a_mm_entry *prev_entry)
1329{ 1337{
1330 int err; 1338 int err = -ENOMEM;
1331 int order; 1339 int order;
1332 struct gk20a *g = gk20a_from_vm(vm); 1340 struct gk20a *g = gk20a_from_vm(vm);
1341 u32 bytes;
1333 1342
1334 gk20a_dbg_fn(""); 1343 gk20a_dbg_fn("");
1335 1344
1336 /* allocate enough pages for the table */ 1345 /* allocate enough pages for the table */
1337 order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; 1346 order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
1338 order += ilog2(l->entry_size); 1347 order += ilog2(l->entry_size);
1348 bytes = 1 << order;
1339 order -= PAGE_SHIFT; 1349 order -= PAGE_SHIFT;
1340 order = max(0, order); 1350 if (order < 0 && prev_entry) {
1351 /* try to suballocate from previous chunk */
1352 u32 capacity = prev_entry->mem.size / bytes;
1353 u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
1354 u32 free = capacity - prev - 1;
1355
1356 gk20a_dbg(gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
1357 capacity, prev, free, bytes);
1358
1359 if (free) {
1360 memcpy(&entry->mem, &prev_entry->mem,
1361 sizeof(entry->mem));
1362 entry->woffset = prev_entry->woffset
1363 + bytes / sizeof(u32);
1364 err = 0;
1365 }
1366 }
1367
1368 if (err) {
1369 /* no suballoc space */
1370 order = max(0, order);
1371 err = alloc_gmmu_pages(vm, order, entry);
1372 entry->woffset = 0;
1373 }
1341 1374
1342 err = alloc_gmmu_pages(vm, order, entry); 1375 gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
1343 gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d",
1344 entry, 1376 entry,
1345 (entry->mem.sgt && entry->mem.aperture == APERTURE_SYSMEM) ? 1377 (entry->mem.sgt && entry->mem.aperture == APERTURE_SYSMEM) ?
1346 g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0) 1378 g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0)
1347 : 0, 1379 : 0,
1348 order); 1380 order, entry->woffset);
1349 if (err) 1381 if (err)
1350 return err; 1382 return err;
1351 entry->pgsz = pgsz_idx; 1383 entry->pgsz = pgsz_idx;
@@ -3476,13 +3508,31 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
3476 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 3508 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
3477} 3509}
3478 3510
3511void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry,
3512 size_t w, size_t data)
3513{
3514 gk20a_mem_wr32(g, &entry->mem, entry->woffset + w, data);
3515}
3516
3517u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
3518{
3519 u64 base;
3520
3521 if (g->mm.has_physical_mode)
3522 base = sg_phys(entry->mem.sgt->sgl);
3523 else
3524 base = gk20a_mem_get_base_addr(g, &entry->mem, 0);
3525
3526 return base + entry->woffset * sizeof(u32);
3527}
3528
3479/* for gk20a the "video memory" apertures here are misnomers. */ 3529/* for gk20a the "video memory" apertures here are misnomers. */
3480static inline u32 big_valid_pde0_bits(struct gk20a *g, 3530static inline u32 big_valid_pde0_bits(struct gk20a *g,
3481 struct mem_desc *entry_mem) 3531 struct gk20a_mm_entry *entry)
3482{ 3532{
3483 u64 pte_addr = gk20a_mem_get_base_addr(g, entry_mem, 0); 3533 u64 pte_addr = gk20a_pde_addr(g, entry);
3484 u32 pde0_bits = 3534 u32 pde0_bits =
3485 gk20a_aperture_mask(g, entry_mem, 3535 gk20a_aperture_mask(g, &entry->mem,
3486 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 3536 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
3487 gmmu_pde_aperture_big_video_memory_f()) | 3537 gmmu_pde_aperture_big_video_memory_f()) |
3488 gmmu_pde_address_big_sys_f( 3538 gmmu_pde_address_big_sys_f(
@@ -3492,11 +3542,11 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
3492} 3542}
3493 3543
3494static inline u32 small_valid_pde1_bits(struct gk20a *g, 3544static inline u32 small_valid_pde1_bits(struct gk20a *g,
3495 struct mem_desc *entry_mem) 3545 struct gk20a_mm_entry *entry)
3496{ 3546{
3497 u64 pte_addr = gk20a_mem_get_base_addr(g, entry_mem, 0); 3547 u64 pte_addr = gk20a_pde_addr(g, entry);
3498 u32 pde1_bits = 3548 u32 pde1_bits =
3499 gk20a_aperture_mask(g, entry_mem, 3549 gk20a_aperture_mask(g, &entry->mem,
3500 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 3550 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
3501 gmmu_pde_aperture_small_video_memory_f()) | 3551 gmmu_pde_aperture_small_video_memory_f()) |
3502 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 3552 gmmu_pde_vol_small_true_f() | /* tbd: why? */
@@ -3536,11 +3586,11 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
3536 3586
3537 pde_v[0] = gmmu_pde_size_full_f(); 3587 pde_v[0] = gmmu_pde_size_full_f();
3538 pde_v[0] |= big_valid ? 3588 pde_v[0] |= big_valid ?
3539 big_valid_pde0_bits(g, &entry->mem) : 3589 big_valid_pde0_bits(g, entry) :
3540 gmmu_pde_aperture_big_invalid_f(); 3590 gmmu_pde_aperture_big_invalid_f();
3541 3591
3542 pde_v[1] |= (small_valid ? 3592 pde_v[1] |= (small_valid ?
3543 small_valid_pde1_bits(g, &entry->mem) : 3593 small_valid_pde1_bits(g, entry) :
3544 (gmmu_pde_aperture_small_invalid_f() | 3594 (gmmu_pde_aperture_small_invalid_f() |
3545 gmmu_pde_vol_small_false_f())) 3595 gmmu_pde_vol_small_false_f()))
3546 | 3596 |
@@ -3549,8 +3599,8 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
3549 3599
3550 pde = pde_from_index(i); 3600 pde = pde_from_index(i);
3551 3601
3552 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]); 3602 gk20a_pde_wr32(g, &vm->pdb, pde + 0, pde_v[0]);
3553 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]); 3603 gk20a_pde_wr32(g, &vm->pdb, pde + 1, pde_v[1]);
3554 3604
3555 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 3605 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
3556 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 3606 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -3633,8 +3683,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
3633 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 3683 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
3634 } 3684 }
3635 3685
3636 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]); 3686 gk20a_pde_wr32(g, pte, pte_from_index(i) + 0, pte_w[0]);
3637 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]); 3687 gk20a_pde_wr32(g, pte, pte_from_index(i) + 1, pte_w[1]);
3638 3688
3639 if (*iova) { 3689 if (*iova) {
3640 *iova += page_size; 3690 *iova += page_size;
@@ -3678,6 +3728,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3678 int err = 0; 3728 int err = 0;
3679 u32 pde_i; 3729 u32 pde_i;
3680 u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; 3730 u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx];
3731 struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL;
3681 3732
3682 gk20a_dbg_fn(""); 3733 gk20a_dbg_fn("");
3683 3734
@@ -3688,7 +3739,6 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3688 pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); 3739 pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
3689 3740
3690 while (gpu_va < gpu_end) { 3741 while (gpu_va < gpu_end) {
3691 struct gk20a_mm_entry *next_pte = NULL;
3692 u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); 3742 u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end);
3693 3743
3694 /* Allocate next level */ 3744 /* Allocate next level */
@@ -3706,11 +3756,12 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3706 pte->pgsz = pgsz_idx; 3756 pte->pgsz = pgsz_idx;
3707 pte->num_entries = num_entries; 3757 pte->num_entries = num_entries;
3708 } 3758 }
3759 prev_pte = next_pte;
3709 next_pte = pte->entries + pde_i; 3760 next_pte = pte->entries + pde_i;
3710 3761
3711 if (!next_pte->mem.size) { 3762 if (!next_pte->mem.size) {
3712 err = gk20a_zalloc_gmmu_page_table(vm, 3763 err = gk20a_zalloc_gmmu_page_table(vm,
3713 pgsz_idx, next_l, next_pte); 3764 pgsz_idx, next_l, next_pte, prev_pte);
3714 if (err) 3765 if (err)
3715 return err; 3766 return err;
3716 } 3767 }
@@ -4203,7 +4254,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4203 name, vm->va_limit, pde_hi + 1); 4254 name, vm->va_limit, pde_hi + 1);
4204 4255
4205 /* allocate the page table directory */ 4256 /* allocate the page table directory */
4206 err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], &vm->pdb); 4257 err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0],
4258 &vm->pdb, NULL);
4207 if (err) 4259 if (err)
4208 goto clean_up_pdes; 4260 goto clean_up_pdes;
4209 4261
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index ee2bb61e..fe10b046 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -183,6 +183,7 @@ struct gk20a_comptags {
183struct gk20a_mm_entry { 183struct gk20a_mm_entry {
184 /* backing for */ 184 /* backing for */
185 struct mem_desc mem; 185 struct mem_desc mem;
186 u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */
186 int pgsz; 187 int pgsz;
187 struct gk20a_mm_entry *entries; 188 struct gk20a_mm_entry *entries;
188 int num_entries; 189 int num_entries;
@@ -631,6 +632,10 @@ u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture,
631u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, 632u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem,
632 u32 sysmem_mask, u32 vidmem_mask); 633 u32 sysmem_mask, u32 vidmem_mask);
633 634
635void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry,
636 size_t w, size_t data);
637u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry);
638
634u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 639u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
635 u64 map_offset, 640 u64 map_offset,
636 struct sg_table *sgt, 641 struct sg_table *sgt,