From 4afc6a1659ec058fd44953ccff7a1030275bcc92 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Mon, 19 Sep 2016 09:24:13 +0300 Subject: gpu: nvgpu: compact pte buffers The lowest page table level may hold very few entries for mappings of large pages, but a new page is allocated for each list of entries at the lowest level, wasting memory and performance. Compact these so that the new "allocation" of ptes is appended at the end of the previous allocation, if there is space. Bug 1736604 Change-Id: I4c7c4cad9019de202325750aee6034076e7e61c2 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1222810 (cherry picked from commit 97303ecc946c17150496486a2f52bd481311dbf7) Reviewed-on: http://git-master/r/1234995 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 39 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c') diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 03bab121..1e073ab2 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -151,18 +151,6 @@ static u32 pte3_from_index(u32 i) return i * gmmu_new_pte__size_v() / sizeof(u32); } -static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) -{ - u64 addr; - - if (g->mm.has_physical_mode) - addr = sg_phys(entry->mem.sgt->sgl); - else - addr = gk20a_mem_get_base_addr(g, &entry->mem, 0); - - return addr; -} - static int update_gmmu_pde3_locked(struct vm_gk20a *vm, struct gk20a_mm_entry *parent, u32 i, u32 gmmu_pgsz_idx, @@ -176,15 +164,13 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, { struct gk20a *g = gk20a_from_vm(vm); u64 pte_addr = 0; - u64 pde_addr = 0; struct gk20a_mm_entry *pte = parent->entries + i; u32 pde_v[2] = {0, 0}; u32 pde; gk20a_dbg_fn(""); - pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v(); - pde_addr = entry_addr(g, parent); + pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v(); pde_v[0] |= gk20a_aperture_mask(g, &pte->mem, gmmu_new_pde_aperture_sys_mem_ncoh_f(), @@ -194,8 +180,8 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, pde_v[1] |= pte_addr >> 24; pde = pde3_from_index(i); - gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]); - gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]); + gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]); + gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]); gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); @@ -232,12 +218,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; if (small_valid) { - pte_addr_small = entry_addr(g, entry) + pte_addr_small = gk20a_pde_addr(g, entry) >> gmmu_new_dual_pde_address_shift_v(); } if (big_valid) - pte_addr_big = entry_addr(g, entry) + pte_addr_big = gk20a_pde_addr(g, entry) >> gmmu_new_dual_pde_address_big_shift_v(); if (small_valid) { @@ -260,10 +246,10 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, pde = pde0_from_index(i); - gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]); - gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]); - gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]); - gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]); + gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]); + gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]); + gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]); + gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]); gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); @@ -286,6 +272,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; u64 ctag_granularity = g->ops.fb.compression_page_size(g); u32 pte_w[2] = {0, 0}; /* invalid pte */ + u32 pte_i; if (*iova) { u32 pte_valid = unmapped_pte ? @@ -331,8 +318,10 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); } - gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]); - gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]); + pte_i = pte3_from_index(i); + + gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]); + gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]); if (*iova) { *iova += page_size; -- cgit v1.2.2