/* * GM20B MMU * * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ #include #include "gk20a/gk20a.h" #include "mm_gm20b.h" #include "hw_gmmu_gm20b.h" #include "hw_fb_gm20b.h" #include "hw_gr_gm20b.h" #include "hw_ram_gm20b.h" static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, u64 first_vaddr, u64 last_vaddr, bool clear, bool refplus) { int err; u32 pte_lo, pte_hi; u32 pde_lo, pde_hi; u32 pte_w[2] = {0, 0}; /* invalid pte */ u64 addr = 0; u32 pte_cur; void *pte_kv_cur; struct page_table_gk20a *pte; struct gk20a *g = gk20a_from_vm(vm); gk20a_dbg_fn(""); pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, &pde_lo, &pde_hi); gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", pgsz_idx, pde_lo, pde_hi); /* Expect ptes of the same pde */ BUG_ON(pde_lo != pde_hi); pte = vm->pdes.ptes[pgsz_idx] + pde_lo; pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); /* get cpu access to the ptes */ err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, pte->size); if (err) goto fail; gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { pte_w[0] = gmmu_pte_valid_false_f(); pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=%llx" " [0x%08x,0x%08x]", pte_cur, addr, pte_w[1], pte_w[0]); gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); } unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); smp_mb(); g->ops.mm.tlb_invalidate(vm); return 0; fail: return err; } static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, u64 vaddr_hi, u32 pde) { u64 pde_vaddr_lo, pde_vaddr_hi; gk20a_dbg_fn(""); pde_vaddr_lo = (u64)pde << vm->pde_stride_shift; pde_vaddr_hi = pde_vaddr_lo | ((0x1UL << (vm->pde_stride_shift)) - 1); return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); } static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, u32 num_pages, u32 pgsz_idx, bool refplus) { struct mm_gk20a *mm = vm->mm; u32 pgsz = vm->gmmu_page_sizes[pgsz_idx]; u32 pde_shift = vm->pde_stride_shift; u64 vaddr_hi; u64 vaddr_pde_start; u32 i; u32 pde_lo, pde_hi; int err; gk20a_dbg_fn(""); vaddr_hi = vaddr + pgsz * (u64)num_pages - 1; pde_range_from_vaddr_range(vm, vaddr, vaddr_hi, &pde_lo, &pde_hi); gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, vm->pde_stride_shift); for (i = pde_lo; i <= pde_hi; i++) { /* Mark all ptes as sparse. */ err = validate_gmmu_page_table_gk20a_locked(vm, i, pgsz_idx); if (err) { gk20a_err(dev_from_vm(vm), "failed to validate page table %d: %d", i, err); goto fail; } if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) { /* entire pde is marked as sparse */ vaddr_pde_start = (u64)i << pde_shift; allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr_pde_start, PDE_ADDR_END(vaddr_pde_start, pde_shift), false, refplus); } else { /* Check leading and trailing spaces which doesn't fit * into entire pde. */ if (pde_lo == pde_hi) allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, vaddr_hi, false, refplus); else if (i == pde_lo) allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, PDE_ADDR_END(vaddr, pde_shift), false, refplus); else allocate_gmmu_ptes_sparse(vm, pgsz_idx, PDE_ADDR_START(vaddr_hi, pde_shift), vaddr_hi, false, refplus); } } gk20a_mm_l2_flush(mm->g, true); return 0; fail: WARN_ON(1); return err; } static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, const unsigned int msec) { unsigned long timeout; if (tegra_platform_is_silicon()) timeout = jiffies + msecs_to_jiffies(msec); else timeout = msecs_to_jiffies(msec); while (1) { u32 val; val = gk20a_readl(g, fb_mmu_vpr_info_r()); if (fb_mmu_vpr_info_fetch_v(val) == fb_mmu_vpr_info_fetch_false_v()) break; if (tegra_platform_is_silicon()) { if (WARN_ON(time_after(jiffies, timeout))) return -ETIME; } else if (--timeout == 0) return -ETIME; } return 0; } int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g) { int ret = 0; gk20a_busy_noresume(g->dev); #ifdef CONFIG_PM_RUNTIME if (!pm_runtime_active(&g->dev->dev)) goto fail; #endif if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) { ret = -ETIME; goto fail; } gk20a_writel(g, fb_mmu_vpr_info_r(), fb_mmu_vpr_info_fetch_true_v()); ret = gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT); fail: pm_runtime_put(&g->dev->dev); return ret; } static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g) { u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r()); return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) == gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(); } static void gm20b_mm_set_big_page_size(struct gk20a *g, void *inst_ptr, int size) { u32 val; gk20a_dbg_fn(""); gk20a_dbg_info("big page size %d\n", size); val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); val &= ~ram_in_big_page_size_m(); if (size == SZ_64K) val |= ram_in_big_page_size_64kb_f(); else val |= ram_in_big_page_size_128kb_f(); gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); gk20a_dbg_fn("done"); } static u32 gm20b_mm_get_big_page_sizes(void) { return SZ_64K | SZ_128K; } void gm20b_init_mm(struct gpu_ops *gops) { gops->mm.set_sparse = gm20b_vm_put_sparse; gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; gops->mm.gmmu_map = gk20a_locked_gmmu_map; gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; gops->mm.vm_remove = gk20a_vm_remove_support; gops->mm.vm_alloc_share = gk20a_vm_alloc_share; gops->mm.vm_bind_channel = gk20a_vm_bind_channel; gops->mm.fb_flush = gk20a_mm_fb_flush; gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; gops->mm.l2_flush = gk20a_mm_l2_flush; gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; }