From 18939e4b4a680a8e01bbec864b267e193fb3714d Mon Sep 17 00:00:00 2001 From: Kevin Huang Date: Wed, 23 Apr 2014 15:39:00 -0700 Subject: gpu: nvgpu: add support to Maxwell sparse texture Bug 1442531 Change-Id: Ie927cca905b2ea9811417e7a1fdfdf9d48f015e2 Signed-off-by: Kevin Huang --- drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 218 +++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 drivers/gpu/nvgpu/gm20b/mm_gm20b.c (limited to 'drivers/gpu/nvgpu/gm20b/mm_gm20b.c') diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c new file mode 100644 index 00000000..67d61569 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -0,0 +1,218 @@ +/* + * GM20B MMU + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "gk20a/gk20a.h" +#include "mm_gm20b.h" +#include "hw_gmmu_gm20b.h" + +static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; +static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; +static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, + 0x1ffffLL }; +static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; + +static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, + enum gmmu_pgsz_gk20a pgsz_idx, + u64 first_vaddr, u64 last_vaddr) +{ + int err; + u32 pte_lo, pte_hi; + u32 pde_lo, pde_hi; + u32 pte_w[2] = {0, 0}; /* invalid pte */ + u64 addr = 0; + u32 pte_cur; + void *pte_kv_cur; + struct page_table_gk20a *pte; + + gk20a_dbg_fn(""); + + pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, + &pde_lo, &pde_hi); + + gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", + pgsz_idx, pde_lo, pde_hi); + + /* Expect ptes of the same pde */ + BUG_ON(pde_lo != pde_hi); + + pte = vm->pdes.ptes[pgsz_idx] + pde_lo; + + pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); + pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); + + /* get cpu access to the ptes */ + err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, pte->size); + if (err) + goto fail; + + gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); + for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { + pte_w[0] = gmmu_pte_valid_false_f(); + pte_w[1] = gmmu_pte_vol_true_f(); + + gk20a_dbg(gpu_dbg_pte, + "pte_cur=%d addr=%llx refs=%d" + " [0x%08x,0x%08x]", + pte_cur, addr, + pte->ref_cnt, pte_w[1], pte_w[0]); + + gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); + gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); + } + + unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); + + smp_mb(); + vm->tlb_dirty = true; + gk20a_dbg_fn("set tlb dirty"); + + return 0; +fail: + return err; + +} + +static void allocate_gmmu_pde_sparse(struct vm_gk20a *vm, u32 i) +{ + bool small_valid, big_valid; + u64 pte_addr[2] = {0, 0}; + struct page_table_gk20a *small_pte = + vm->pdes.ptes[gmmu_page_size_small] + i; + struct page_table_gk20a *big_pte = + vm->pdes.ptes[gmmu_page_size_big] + i; + u32 pde_v[2] = {0, 0}; + u32 *pde; + + gk20a_dbg_fn(""); + + small_valid = small_pte && small_pte->ref; + big_valid = big_pte && big_pte->ref; + + if (small_valid) + pte_addr[gmmu_page_size_small] = + gk20a_mm_iova_addr(small_pte->sgt->sgl); + if (big_valid) + pte_addr[gmmu_page_size_big] = + gk20a_mm_iova_addr(big_pte->sgt->sgl); + + pde_v[0] = gmmu_pde_size_full_f(); + pde_v[0] |= gmmu_pde_aperture_big_invalid_f(); + pde_v[1] |= gmmu_pde_aperture_small_invalid_f() | + gmmu_pde_vol_big_true_f(); + + pde = pde_from_index(vm, i); + + gk20a_mem_wr32(pde, 0, pde_v[0]); + gk20a_mem_wr32(pde, 1, pde_v[1]); + + smp_mb(); + + FLUSH_CPU_DCACHE(pde, + sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()), + sizeof(u32)*2); + + gk20a_mm_l2_invalidate(vm->mm->g); + + gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); + + vm->tlb_dirty = true; +} + +static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, + u64 vaddr_hi, u32 pde) +{ + u64 pde_vaddr_lo, pde_vaddr_hi; + + gk20a_dbg_fn(""); + + pde_vaddr_lo = (u64)pde << vm->mm->pde_stride_shift; + pde_vaddr_hi = pde_vaddr_lo | + ((0x1UL << (vm->mm->pde_stride_shift)) - 1); + + return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); +} + +static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, + u32 num_pages, u32 pgsz_idx) +{ + struct mm_gk20a *mm = vm->mm; + u32 pgsz = gmmu_page_sizes[pgsz_idx]; + u32 pde_shift = vm->mm->pde_stride_shift; + u64 vaddr_hi; + u64 vaddr_pde_start; + u32 i; + u32 pde_lo, pde_hi; + int err; + + gk20a_dbg_fn(""); + + vaddr_hi = vaddr + pgsz * num_pages - 1; + pde_range_from_vaddr_range(vm, + vaddr, + vaddr_hi, + &pde_lo, &pde_hi); + + gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " + "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", + vaddr, vaddr_hi, pde_lo, pde_hi, + vm->mm->pde_stride_shift, pgsz); + + for (i = pde_lo; i <= pde_hi; i++) { + /* Mark all ptes as sparse. */ + err = validate_gmmu_page_table_gk20a_locked(vm, i, + pgsz_idx); + if (err) { + gk20a_err(dev_from_vm(vm), + "failed to validate page table %d: %d", + i, err); + goto fail; + } + + if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) { + /* entire pde is marked as sparse */ + vaddr_pde_start = (u64)i << pde_shift; + allocate_gmmu_ptes_sparse(vm, pgsz_idx, + vaddr_pde_start, + PDE_ADDR_END(vaddr_pde_start, pde_shift)); + } else { + /* Check leading and trailing spaces which doesn't fit + * into entire pde. */ + if (pde_lo == pde_hi) + allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, + vaddr_hi); + else if (i == pde_lo) + allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, + PDE_ADDR_END(vaddr, pde_shift)); + else + allocate_gmmu_ptes_sparse(vm, pgsz_idx, + PDE_ADDR_START(vaddr_hi, pde_shift), + vaddr_hi); + } + } + + gk20a_mm_l2_flush(mm->g, true); + + return 0; + +fail: + WARN_ON(1); + + return err; +} + +void gm20b_init_mm(struct gpu_ops *gops) +{ + gops->mm.set_sparse = gm20b_vm_put_sparse; +} -- cgit v1.2.2