From ac0cd782ab539d3a89ac2cc50955f80b8be456d1 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Thu, 29 Jan 2015 11:29:56 -0800 Subject: gpu: nvgpu: gp10b: Implement new page table format Implement the 5-level Pascal page table format. It is enabled only for simulation. Change-Id: I6767fac8b52fe0f6a2e2f86312de5fc93af6518e Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/682114 --- drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 197 +++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c') diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index e3e2c173..1608b176 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -20,6 +20,8 @@ #include "rpfb_gp10b.h" #include "hw_ram_gp10b.h" #include "hw_bus_gp10b.h" +#include "hw_gmmu_gp10b.h" +#include "gk20a/semaphore_gk20a.h" static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g) { @@ -138,6 +140,197 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); } +u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) +{ + return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v()); +} + +static int update_gmmu_pde3_locked(struct vm_gk20a *vm, + struct gk20a_mm_entry *parent, + u32 i, u32 gmmu_pgsz_idx, + u64 iova, + u32 kind_v, u32 *ctag, + bool cacheable, bool unmapped_pte, + int rw_flag, bool sparse, u32 flags) +{ + u64 pte_addr = 0; + u64 pde_addr = 0; + struct gk20a_mm_entry *pte = parent->entries + i; + u32 pde_v[2] = {0, 0}; + u32 *pde; + struct gk20a *g = vm->mm->g; + + gk20a_dbg_fn(""); + + pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0) + >> gmmu_new_pde_address_shift_v(); + pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0); + + pde_v[0] |= gmmu_new_pde_aperture_video_memory_f(); + pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); + + pde = pde3_from_index(parent, i); + + gk20a_mem_wr32(pde, 0, pde_v[0]); + gk20a_mem_wr32(pde, 1, pde_v[1]); + + gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", + i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); + gk20a_dbg_fn("done"); + return 0; +} + +u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) +{ + return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v()); +} + +static int update_gmmu_pde0_locked(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + u32 i, u32 gmmu_pgsz_idx, + u64 iova, + u32 kind_v, u32 *ctag, + bool cacheable, bool unmapped_pte, + int rw_flag, bool sparse, u32 flags) +{ + bool small_valid, big_valid; + u32 pte_addr_small = 0, pte_addr_big = 0; + struct gk20a_mm_entry *entry = pte->entries + i; + u32 pde_v[4] = {0, 0, 0, 0}; + u32 *pde; + struct gk20a *g = vm->mm->g; + + gk20a_dbg_fn(""); + + small_valid = entry->size && entry->pgsz == gmmu_page_size_small; + big_valid = entry->size && entry->pgsz == gmmu_page_size_big; + + if (small_valid) + pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0) + >> gmmu_new_dual_pde_address_shift_v(); + + if (big_valid) + pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0) + >> gmmu_new_dual_pde_address_big_shift_v(); + + if (small_valid) { + pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small); + pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f(); + pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); + } + + if (big_valid) { + pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big); + pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); + pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f(); + } + + pde = pde0_from_index(pte, i); + + gk20a_mem_wr32(pde, 0, pde_v[0]); + gk20a_mem_wr32(pde, 1, pde_v[1]); + gk20a_mem_wr32(pde, 2, pde_v[2]); + gk20a_mem_wr32(pde, 3, pde_v[3]); + + gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", + i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); + gk20a_dbg_fn("done"); + return 0; +} + +static int update_gmmu_pte_locked(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + u32 i, u32 gmmu_pgsz_idx, + u64 iova, + u32 kind_v, u32 *ctag, + bool cacheable, bool unmapped_pte, + int rw_flag, bool sparse, u32 flags) +{ + u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; + u32 pte_w[2] = {0, 0}; /* invalid pte */ + + gk20a_dbg_fn(""); + + if (iova) { + pte_w[0] = gmmu_new_pte_valid_true_f() | + gmmu_new_pte_address_sys_f(iova + >> gmmu_new_pte_address_shift_v()); + pte_w[1] = gmmu_new_pte_aperture_video_memory_f() | + gmmu_new_pte_kind_f(kind_v) | + gmmu_new_pte_comptagline_f(*ctag / SZ_128K); + + if (rw_flag == gk20a_mem_flag_read_only) + pte_w[0] |= gmmu_new_pte_read_only_true_f(); + if (!cacheable) + pte_w[1] |= gmmu_new_pte_vol_true_f(); + + gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d" + " ctag=%d vol=%d" + " [0x%08x, 0x%08x]", + i, iova, + kind_v, *ctag, !cacheable, + pte_w[1], pte_w[0]); + + if (*ctag) + *ctag += page_size; + } else if (sparse) { + pte_w[0] = gmmu_new_pte_valid_false_f(); + pte_w[1] |= gmmu_new_pte_vol_true_f(); + } else { + gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); + } + + gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); + gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); + + gk20a_dbg_fn("done"); + return 0; +} + +const struct gk20a_mmu_level gp10b_mm_levels[] = { + {.hi_bit = {48, 48}, + .lo_bit = {47, 47}, + .update_entry = update_gmmu_pde3_locked, + .entry_size = 8}, + {.hi_bit = {46, 46}, + .lo_bit = {38, 38}, + .update_entry = update_gmmu_pde3_locked, + .entry_size = 8}, + {.hi_bit = {37, 37}, + .lo_bit = {29, 29}, + .update_entry = update_gmmu_pde3_locked, + .entry_size = 8}, + {.hi_bit = {28, 28}, + .lo_bit = {21, 21}, + .update_entry = update_gmmu_pde0_locked, + .entry_size = 16}, + {.hi_bit = {20, 20}, + .lo_bit = {12, 16}, + .update_entry = update_gmmu_pte_locked, + .entry_size = 8}, + {.update_entry = NULL} +}; + +const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size) +{ + return gp10b_mm_levels; +} + +static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) +{ + u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + u32 pdb_addr_hi = u64_hi32(pdb_addr); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + ram_in_page_dir_base_target_vid_mem_f() | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(pdb_addr_lo) | + 1 << 10); + + gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + ram_in_page_dir_base_hi_f(pdb_addr_hi)); +} + void gp10b_init_mm(struct gpu_ops *gops) { gm20b_init_mm(gops); @@ -146,4 +339,8 @@ void gp10b_init_mm(struct gpu_ops *gops) gops->mm.init_bar2_vm = gb10b_init_bar2_vm; gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup; gops->mm.get_iova_addr = gp10b_mm_iova_addr; + if (tegra_platform_is_linsim()) { + gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels; + gops->mm.init_pdb = gp10b_mm_init_pdb; + } } -- cgit v1.2.2