1 files changed, 446 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
new file mode 100644
index 00000000..4b985af4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -0,0 +1,446 @@
+/*
+ * GP10B MMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/mm.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include "gk20a/gk20a.h"
+#include "gm20b/mm_gm20b.h"
+#include "mm_gp10b.h"
+#include "rpfb_gp10b.h"
+#include <nvgpu/hw/gp10b/hw_fb_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_ram_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_bus_gp10b.h>
+#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
+u32 gp10b_mm_get_default_big_page_size(void)
+{
+        return SZ_64K;
+}
+u32 gp10b_mm_get_iommu_bit(struct gk20a *g)
+{
+        return 36;
+}
+int gp10b_init_mm_setup_hw(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
+        int err = 0;
+        gk20a_dbg_fn("");
+        g->ops.fb.set_mmu_page_size(g);
+        gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
+                     nvgpu_mem_get_addr(g, &g->mm.sysmem_flush) >> 8ULL);
+        g->ops.bus.bar1_bind(g, inst_block);
+        if (g->ops.mm.init_bar2_mm_hw_setup) {
+                err = g->ops.mm.init_bar2_mm_hw_setup(g);
+                if (err)
+                        return err;
+        }
+        if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
+                return -EBUSY;
+        err = gp10b_replayable_pagefault_buffer_init(g);
+        gk20a_dbg_fn("done");
+        return err;
+}
+int gb10b_init_bar2_vm(struct gk20a *g)
+{
+        int err;
+        struct mm_gk20a *mm = &g->mm;
+        struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        /* BAR2 aperture size is 32MB */
+        mm->bar2.aperture_size = 32 << 20;
+        gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
+        mm->bar2.vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
+                mm->bar2.aperture_size - SZ_4K,
+                mm->bar2.aperture_size, false, false, "bar2");
+        if (!mm->bar2.vm)
+                return -ENOMEM;
+        /* allocate instance mem for bar2 */
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                goto clean_up_va;
+        g->ops.mm.init_inst_block(inst_block, mm->bar2.vm, big_page_size);
+        return 0;
+clean_up_va:
+        nvgpu_vm_put(mm->bar2.vm);
+        return err;
+}
+int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
+        u64 inst_pa = nvgpu_inst_block_addr(g, inst_block);
+        gk20a_dbg_fn("");
+        g->ops.fb.set_mmu_page_size(g);
+        inst_pa = (u32)(inst_pa >> bus_bar2_block_ptr_shift_v());
+        gk20a_dbg_info("bar2 inst block ptr: 0x%08x",  (u32)inst_pa);
+        gk20a_writel(g, bus_bar2_block_r(),
+                     nvgpu_aperture_mask(g, inst_block,
+                                bus_bar2_block_target_sys_mem_ncoh_f(),
+                                bus_bar2_block_target_vid_mem_f()) |
+                     bus_bar2_block_mode_virtual_f() |
+                     bus_bar2_block_ptr_f(inst_pa));
+        gk20a_dbg_fn("done");
+        return 0;
+}
+static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
+                                    const struct gk20a_mmu_level *l,
+                                    struct nvgpu_gmmu_pd *pd,
+                                    u32 pd_idx,
+                                    u64 virt_addr,
+                                    u64 phys_addr,
+                                    struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        u32 pd_offset = pd_offset_from_index(l, pd_idx);
+        u32 pde_v[2] = {0, 0};
+        phys_addr >>= gmmu_new_pde_address_shift_v();
+        pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
+                        gmmu_new_pde_aperture_sys_mem_ncoh_f(),
+                        gmmu_new_pde_aperture_video_memory_f());
+        pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
+        pde_v[0] |= gmmu_new_pde_vol_true_f();
+        pde_v[1] |= phys_addr >> 24;
+        pd_write(g, pd, pd_offset + 0, pde_v[0]);
+        pd_write(g, pd, pd_offset + 1, pde_v[1]);
+        pte_dbg(g, attrs,
+                "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | "
+                "GPU %#-12llx  phys %#-12llx "
+                "[0x%08x, 0x%08x]",
+                pd_idx, l->entry_size, pd_offset,
+                virt_addr, phys_addr,
+                pde_v[1], pde_v[0]);
+}
+static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
+                                    const struct gk20a_mmu_level *l,
+                                    struct nvgpu_gmmu_pd *pd,
+                                    u32 pd_idx,
+                                    u64 virt_addr,
+                                    u64 phys_addr,
+                                    struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        bool small_valid, big_valid;
+        u32 small_addr = 0, big_addr = 0;
+        u32 pd_offset = pd_offset_from_index(l, pd_idx);
+        u32 pde_v[4] = {0, 0, 0, 0};
+        small_valid = attrs->pgsz == gmmu_page_size_small;
+        big_valid   = attrs->pgsz == gmmu_page_size_big;
+        if (small_valid)
+                small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v();
+        if (big_valid)
+                big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v();
+        if (small_valid) {
+                pde_v[2] |=
+                        gmmu_new_dual_pde_address_small_sys_f(small_addr);
+                pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
+                        gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
+                        gmmu_new_dual_pde_aperture_small_video_memory_f());
+                pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
+                pde_v[3] |= small_addr >> 24;
+        }
+        if (big_valid) {
+                pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr);
+                pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
+                pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
+                        gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
+                        gmmu_new_dual_pde_aperture_big_video_memory_f());
+                pde_v[1] |= big_addr >> 28;
+        }
+        pd_write(g, pd, pd_offset + 0, pde_v[0]);
+        pd_write(g, pd, pd_offset + 1, pde_v[1]);
+        pd_write(g, pd, pd_offset + 2, pde_v[2]);
+        pd_write(g, pd, pd_offset + 3, pde_v[3]);
+        pte_dbg(g, attrs,
+                "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
+                "GPU %#-12llx  phys %#-12llx "
+                "[0x%08x, 0x%08x, 0x%08x, 0x%08x]",
+                pd_idx, l->entry_size, pd_offset,
+                small_valid ? 'S' : '-',
+                big_valid ?   'B' : '-',
+                virt_addr, phys_addr,
+                pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
+}
+static void __update_pte(struct vm_gk20a *vm,
+                         u32 *pte_w,
+                         u64 phys_addr,
+                         struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        u64 ctag_granularity = g->ops.fb.compression_page_size(g);
+        u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
+        u32 pte_valid = attrs->valid ?
+                gmmu_new_pte_valid_true_f() :
+                gmmu_new_pte_valid_false_f();
+        u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v();
+        u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
+                gmmu_new_pte_address_sys_f(phys_shifted) :
+                gmmu_new_pte_address_vid_f(phys_shifted);
+        u32 pte_tgt = __nvgpu_aperture_mask(g,
+                        attrs->aperture,
+                        attrs->coherent ?
+                                gmmu_new_pte_aperture_sys_mem_coh_f() :
+                                gmmu_new_pte_aperture_sys_mem_ncoh_f(),
+                        gmmu_new_pte_aperture_video_memory_f());
+        pte_w[0] = pte_valid | pte_addr | pte_tgt;
+        if (attrs->priv)
+                pte_w[0] |= gmmu_new_pte_privilege_true_f();
+        pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) |
+                gmmu_new_pte_kind_f(attrs->kind_v) |
+                gmmu_new_pte_comptagline_f((u32)(attrs->ctag /
+                                                 ctag_granularity));
+        if (attrs->rw_flag == gk20a_mem_flag_read_only)
+                pte_w[0] |= gmmu_new_pte_read_only_true_f();
+        if (!attrs->valid && !attrs->cacheable)
+                pte_w[0] |= gmmu_new_pte_read_only_true_f();
+        else if (!attrs->cacheable)
+                pte_w[0] |= gmmu_new_pte_vol_true_f();
+        if (attrs->ctag)
+                attrs->ctag += page_size;
+}
+static void __update_pte_sparse(u32 *pte_w)
+{
+        pte_w[0] = gmmu_new_pte_valid_false_f();
+        pte_w[0] |= gmmu_new_pte_vol_true_f();
+}
+static void update_gmmu_pte_locked(struct vm_gk20a *vm,
+                                   const struct gk20a_mmu_level *l,
+                                   struct nvgpu_gmmu_pd *pd,
+                                   u32 pd_idx,
+                                   u64 virt_addr,
+                                   u64 phys_addr,
+                                   struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = vm->mm->g;
+        u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
+        u32 pd_offset = pd_offset_from_index(l, pd_idx);
+        u32 pte_w[2] = {0, 0};
+        if (phys_addr)
+                __update_pte(vm, pte_w, phys_addr, attrs);
+        else if (attrs->sparse)
+                __update_pte_sparse(pte_w);
+        pte_dbg(g, attrs,
+                "vm=%s "
+                "PTE: i=%-4u size=%-2u | "
+                "GPU %#-12llx  phys %#-12llx "
+                "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c "
+                "ctag=0x%08x "
+                "[0x%08x, 0x%08x]",
+                vm->name,
+                pd_idx, l->entry_size,
+                virt_addr, phys_addr,
+                page_size >> 10,
+                nvgpu_gmmu_perm_str(attrs->rw_flag),
+                attrs->kind_v,
+                nvgpu_aperture_str(attrs->aperture),
+                attrs->cacheable ? 'C' : 'v',
+                attrs->sparse    ? 'S' : '-',
+                attrs->priv      ? 'P' : '-',
+                attrs->coherent  ? 'c' : '-',
+                attrs->valid     ? 'V' : '-',
+                (u32)attrs->ctag / g->ops.fb.compression_page_size(g),
+                pte_w[1], pte_w[0]);
+        pd_write(g, pd, pd_offset + 0, pte_w[0]);
+        pd_write(g, pd, pd_offset + 1, pte_w[1]);
+}
+#define GP10B_PDE0_ENTRY_SIZE 16
+/*
+ * Calculate the pgsz of the pde level
+ * Pascal+ implements a 5 level page table structure with only the last
+ * level having a different number of entries depending on whether it holds
+ * big pages or small pages.
+ */
+static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
+                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+        u32 pde_base = pd->mem_offs / sizeof(u32);
+        u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
+        u32 i;
+        enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
+        if (!pd->mem)
+                return pgsz;
+        nvgpu_mem_begin(g, pd->mem);
+        for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
+                pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
+        nvgpu_mem_end(g, pd->mem);
+        /*
+         * Check if the aperture AND address are set
+         */
+        if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
+                        gmmu_new_dual_pde_aperture_small_video_memory_f())) {
+                u64 addr = ((u64) (pde_v[2] &
+                        gmmu_new_dual_pde_address_small_sys_f(~0)) <<
+                        gmmu_new_dual_pde_address_shift_v()) |
+                        ((u64) pde_v[3] << 32);
+                if (addr)
+                        pgsz = gmmu_page_size_small;
+        }
+        if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
+                        gmmu_new_dual_pde_aperture_big_video_memory_f())) {
+                u64 addr = ((u64) (pde_v[0] &
+                        gmmu_new_dual_pde_address_big_sys_f(~0)) <<
+                        gmmu_new_dual_pde_address_big_shift_v()) |
+                        ((u64) pde_v[1] << 32);
+                if (addr) {
+                        /*
+                         * If small is set that means that somehow MM allowed
+                         * both small and big to be set, the PDE is not valid
+                         * and may be corrupted
+                         */
+                        if (pgsz == gmmu_page_size_small) {
+                                nvgpu_err(g,
+                                        "both small and big apertures enabled");
+                                return gmmu_nr_page_sizes;
+                        }
+                }
+                pgsz = gmmu_page_size_big;
+        }
+        return pgsz;
+}
+static const struct gk20a_mmu_level gp10b_mm_levels[] = {
+        {.hi_bit = {48, 48},
+         .lo_bit = {47, 47},
+         .update_entry = update_gmmu_pde3_locked,
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
+        {.hi_bit = {46, 46},
+         .lo_bit = {38, 38},
+         .update_entry = update_gmmu_pde3_locked,
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
+        {.hi_bit = {37, 37},
+         .lo_bit = {29, 29},
+         .update_entry = update_gmmu_pde3_locked,
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
+        {.hi_bit = {28, 28},
+         .lo_bit = {21, 21},
+         .update_entry = update_gmmu_pde0_locked,
+         .entry_size = GP10B_PDE0_ENTRY_SIZE,
+         .get_pgsz = gp10b_get_pde0_pgsz},
+        {.hi_bit = {20, 20},
+         .lo_bit = {12, 16},
+         .update_entry = update_gmmu_pte_locked,
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pte_pgsz},
+        {.update_entry = NULL}
+};
+const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
+        u32 big_page_size)
+{
+        return gp10b_mm_levels;
+}
+void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
+                struct vm_gk20a *vm)
+{
+        u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
+        u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+        u32 pdb_addr_hi = u64_hi32(pdb_addr);
+        gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
+        nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
+                nvgpu_aperture_mask(g, vm->pdb.mem,
+                  ram_in_page_dir_base_target_sys_mem_ncoh_f(),
+                  ram_in_page_dir_base_target_vid_mem_f()) |
+                ram_in_page_dir_base_vol_true_f() |
+                ram_in_page_dir_base_lo_f(pdb_addr_lo) |
+                1 << 10);
+        nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
+                ram_in_page_dir_base_hi_f(pdb_addr_hi));
+}
+void gp10b_remove_bar2_vm(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        gp10b_replayable_pagefault_buffer_deinit(g);
+        nvgpu_free_inst_block(g, &mm->bar2.inst_block);
+        nvgpu_vm_put(mm->bar2.vm);
+}

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c new file mode 100644 index 00000000..4b985af4 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -0,0 +1,446 @@
	1	/*
	2	* GP10B MMU
	3	*
	4	* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a
	7	* copy of this software and associated documentation files (the "Software"),
	8	* to deal in the Software without restriction, including without limitation
	9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	10	* and/or sell copies of the Software, and to permit persons to whom the
	11	* Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	22	* DEALINGS IN THE SOFTWARE.
	23	*/
	24
	25	#include <nvgpu/mm.h>
	26	#include <nvgpu/dma.h>
	27	#include <nvgpu/gmmu.h>
	28
	29	#include "gk20a/gk20a.h"
	30	#include "gm20b/mm_gm20b.h"
	31	#include "mm_gp10b.h"
	32	#include "rpfb_gp10b.h"
	33
	34	#include <nvgpu/hw/gp10b/hw_fb_gp10b.h>
	35	#include <nvgpu/hw/gp10b/hw_ram_gp10b.h>
	36	#include <nvgpu/hw/gp10b/hw_bus_gp10b.h>
	37	#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
	38
	39	u32 gp10b_mm_get_default_big_page_size(void)
	40	{
	41	return SZ_64K;
	42	}
	43
	44	u32 gp10b_mm_get_iommu_bit(struct gk20a *g)
	45	{
	46	return 36;
	47	}
	48
	49	int gp10b_init_mm_setup_hw(struct gk20a *g)
	50	{
	51	struct mm_gk20a *mm = &g->mm;
	52	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
	53	int err = 0;
	54
	55	gk20a_dbg_fn("");
	56
	57	g->ops.fb.set_mmu_page_size(g);
	58
	59	gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
	60	nvgpu_mem_get_addr(g, &g->mm.sysmem_flush) >> 8ULL);
	61
	62	g->ops.bus.bar1_bind(g, inst_block);
	63
	64	if (g->ops.mm.init_bar2_mm_hw_setup) {
	65	err = g->ops.mm.init_bar2_mm_hw_setup(g);
	66	if (err)
	67	return err;
	68	}
	69
	70	if (gk20a_mm_fb_flush(g) \|\| gk20a_mm_fb_flush(g))
	71	return -EBUSY;
	72
	73	err = gp10b_replayable_pagefault_buffer_init(g);
	74
	75	gk20a_dbg_fn("done");
	76	return err;
	77
	78	}
	79
	80	int gb10b_init_bar2_vm(struct gk20a *g)
	81	{
	82	int err;
	83	struct mm_gk20a *mm = &g->mm;
	84	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
	85	u32 big_page_size = g->ops.mm.get_default_big_page_size();
	86
	87	/* BAR2 aperture size is 32MB */
	88	mm->bar2.aperture_size = 32 << 20;
	89	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
	90
	91	mm->bar2.vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
	92	mm->bar2.aperture_size - SZ_4K,
	93	mm->bar2.aperture_size, false, false, "bar2");
	94	if (!mm->bar2.vm)
	95	return -ENOMEM;
	96
	97	/* allocate instance mem for bar2 */
	98	err = g->ops.mm.alloc_inst_block(g, inst_block);
	99	if (err)
	100	goto clean_up_va;
	101
	102	g->ops.mm.init_inst_block(inst_block, mm->bar2.vm, big_page_size);
	103
	104	return 0;
	105
	106	clean_up_va:
	107	nvgpu_vm_put(mm->bar2.vm);
	108	return err;
	109	}
	110
	111	int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
	112	{
	113	struct mm_gk20a *mm = &g->mm;
	114	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
	115	u64 inst_pa = nvgpu_inst_block_addr(g, inst_block);
	116
	117	gk20a_dbg_fn("");
	118
	119	g->ops.fb.set_mmu_page_size(g);
	120
	121	inst_pa = (u32)(inst_pa >> bus_bar2_block_ptr_shift_v());
	122	gk20a_dbg_info("bar2 inst block ptr: 0x%08x", (u32)inst_pa);
	123
	124	gk20a_writel(g, bus_bar2_block_r(),
	125	nvgpu_aperture_mask(g, inst_block,
	126	bus_bar2_block_target_sys_mem_ncoh_f(),
	127	bus_bar2_block_target_vid_mem_f()) \|
	128	bus_bar2_block_mode_virtual_f() \|
	129	bus_bar2_block_ptr_f(inst_pa));
	130
	131	gk20a_dbg_fn("done");
	132	return 0;
	133	}
	134
	135	static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
	136	const struct gk20a_mmu_level *l,
	137	struct nvgpu_gmmu_pd *pd,
	138	u32 pd_idx,
	139	u64 virt_addr,
	140	u64 phys_addr,
	141	struct nvgpu_gmmu_attrs *attrs)
	142	{
	143	struct gk20a *g = gk20a_from_vm(vm);
	144	u32 pd_offset = pd_offset_from_index(l, pd_idx);
	145	u32 pde_v[2] = {0, 0};
	146
	147	phys_addr >>= gmmu_new_pde_address_shift_v();
	148
	149	pde_v[0] \|= nvgpu_aperture_mask(g, pd->mem,
	150	gmmu_new_pde_aperture_sys_mem_ncoh_f(),
	151	gmmu_new_pde_aperture_video_memory_f());
	152	pde_v[0] \|= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
	153	pde_v[0] \|= gmmu_new_pde_vol_true_f();
	154	pde_v[1] \|= phys_addr >> 24;
	155
	156	pd_write(g, pd, pd_offset + 0, pde_v[0]);
	157	pd_write(g, pd, pd_offset + 1, pde_v[1]);
	158
	159	pte_dbg(g, attrs,
	160	"PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- \| "
	161	"GPU %#-12llx phys %#-12llx "
	162	"[0x%08x, 0x%08x]",
	163	pd_idx, l->entry_size, pd_offset,
	164	virt_addr, phys_addr,
	165	pde_v[1], pde_v[0]);
	166	}
	167
	168	static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
	169	const struct gk20a_mmu_level *l,
	170	struct nvgpu_gmmu_pd *pd,
	171	u32 pd_idx,
	172	u64 virt_addr,
	173	u64 phys_addr,
	174	struct nvgpu_gmmu_attrs *attrs)
	175	{
	176	struct gk20a *g = gk20a_from_vm(vm);
	177	bool small_valid, big_valid;
	178	u32 small_addr = 0, big_addr = 0;
	179	u32 pd_offset = pd_offset_from_index(l, pd_idx);
	180	u32 pde_v[4] = {0, 0, 0, 0};
	181
	182	small_valid = attrs->pgsz == gmmu_page_size_small;
	183	big_valid = attrs->pgsz == gmmu_page_size_big;
	184
	185	if (small_valid)
	186	small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v();
	187
	188	if (big_valid)
	189	big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v();
	190
	191	if (small_valid) {
	192	pde_v[2] \|=
	193	gmmu_new_dual_pde_address_small_sys_f(small_addr);
	194	pde_v[2] \|= nvgpu_aperture_mask(g, pd->mem,
	195	gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
	196	gmmu_new_dual_pde_aperture_small_video_memory_f());
	197	pde_v[2] \|= gmmu_new_dual_pde_vol_small_true_f();
	198	pde_v[3] \|= small_addr >> 24;
	199	}
	200
	201	if (big_valid) {
	202	pde_v[0] \|= gmmu_new_dual_pde_address_big_sys_f(big_addr);
	203	pde_v[0] \|= gmmu_new_dual_pde_vol_big_true_f();
	204	pde_v[0] \|= nvgpu_aperture_mask(g, pd->mem,
	205	gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
	206	gmmu_new_dual_pde_aperture_big_video_memory_f());
	207	pde_v[1] \|= big_addr >> 28;
	208	}
	209
	210	pd_write(g, pd, pd_offset + 0, pde_v[0]);
	211	pd_write(g, pd, pd_offset + 1, pde_v[1]);
	212	pd_write(g, pd, pd_offset + 2, pde_v[2]);
	213	pd_write(g, pd, pd_offset + 3, pde_v[3]);
	214
	215	pte_dbg(g, attrs,
	216	"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c \| "
	217	"GPU %#-12llx phys %#-12llx "
	218	"[0x%08x, 0x%08x, 0x%08x, 0x%08x]",
	219	pd_idx, l->entry_size, pd_offset,
	220	small_valid ? 'S' : '-',
	221	big_valid ? 'B' : '-',
	222	virt_addr, phys_addr,
	223	pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
	224	}
	225
	226	static void __update_pte(struct vm_gk20a *vm,
	227	u32 *pte_w,
	228	u64 phys_addr,
	229	struct nvgpu_gmmu_attrs *attrs)
	230	{
	231	struct gk20a *g = gk20a_from_vm(vm);
	232	u64 ctag_granularity = g->ops.fb.compression_page_size(g);
	233	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
	234	u32 pte_valid = attrs->valid ?
	235	gmmu_new_pte_valid_true_f() :
	236	gmmu_new_pte_valid_false_f();
	237	u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v();
	238	u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
	239	gmmu_new_pte_address_sys_f(phys_shifted) :
	240	gmmu_new_pte_address_vid_f(phys_shifted);
	241	u32 pte_tgt = __nvgpu_aperture_mask(g,
	242	attrs->aperture,
	243	attrs->coherent ?
	244	gmmu_new_pte_aperture_sys_mem_coh_f() :
	245	gmmu_new_pte_aperture_sys_mem_ncoh_f(),
	246	gmmu_new_pte_aperture_video_memory_f());
	247
	248	pte_w[0] = pte_valid \| pte_addr \| pte_tgt;
	249
	250	if (attrs->priv)
	251	pte_w[0] \|= gmmu_new_pte_privilege_true_f();
	252
	253	pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) \|
	254	gmmu_new_pte_kind_f(attrs->kind_v) \|
	255	gmmu_new_pte_comptagline_f((u32)(attrs->ctag /
	256	ctag_granularity));
	257
	258	if (attrs->rw_flag == gk20a_mem_flag_read_only)
	259	pte_w[0] \|= gmmu_new_pte_read_only_true_f();
	260
	261	if (!attrs->valid && !attrs->cacheable)
	262	pte_w[0] \|= gmmu_new_pte_read_only_true_f();
	263	else if (!attrs->cacheable)
	264	pte_w[0] \|= gmmu_new_pte_vol_true_f();
	265
	266	if (attrs->ctag)
	267	attrs->ctag += page_size;
	268
	269	}
	270
	271	static void __update_pte_sparse(u32 *pte_w)
	272	{
	273	pte_w[0] = gmmu_new_pte_valid_false_f();
	274	pte_w[0] \|= gmmu_new_pte_vol_true_f();
	275	}
	276
	277	static void update_gmmu_pte_locked(struct vm_gk20a *vm,
	278	const struct gk20a_mmu_level *l,
	279	struct nvgpu_gmmu_pd *pd,
	280	u32 pd_idx,
	281	u64 virt_addr,
	282	u64 phys_addr,
	283	struct nvgpu_gmmu_attrs *attrs)
	284	{
	285	struct gk20a *g = vm->mm->g;
	286	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
	287	u32 pd_offset = pd_offset_from_index(l, pd_idx);
	288	u32 pte_w[2] = {0, 0};
	289
	290	if (phys_addr)
	291	__update_pte(vm, pte_w, phys_addr, attrs);
	292	else if (attrs->sparse)
	293	__update_pte_sparse(pte_w);
	294
	295	pte_dbg(g, attrs,
	296	"vm=%s "
	297	"PTE: i=%-4u size=%-2u \| "
	298	"GPU %#-12llx phys %#-12llx "
	299	"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c "
	300	"ctag=0x%08x "
	301	"[0x%08x, 0x%08x]",
	302	vm->name,
	303	pd_idx, l->entry_size,
	304	virt_addr, phys_addr,
	305	page_size >> 10,
	306	nvgpu_gmmu_perm_str(attrs->rw_flag),
	307	attrs->kind_v,
	308	nvgpu_aperture_str(attrs->aperture),
	309	attrs->cacheable ? 'C' : 'v',
	310	attrs->sparse ? 'S' : '-',
	311	attrs->priv ? 'P' : '-',
	312	attrs->coherent ? 'c' : '-',
	313	attrs->valid ? 'V' : '-',
	314	(u32)attrs->ctag / g->ops.fb.compression_page_size(g),
	315	pte_w[1], pte_w[0]);
	316
	317	pd_write(g, pd, pd_offset + 0, pte_w[0]);
	318	pd_write(g, pd, pd_offset + 1, pte_w[1]);
	319	}
	320
	321	#define GP10B_PDE0_ENTRY_SIZE 16
	322
	323	/*
	324	* Calculate the pgsz of the pde level
	325	* Pascal+ implements a 5 level page table structure with only the last
	326	* level having a different number of entries depending on whether it holds
	327	* big pages or small pages.
	328	*/
	329	static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
	330	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
	331	{
	332	u32 pde_base = pd->mem_offs / sizeof(u32);
	333	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
	334	u32 i;
	335	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
	336
	337	if (!pd->mem)
	338	return pgsz;
	339
	340	nvgpu_mem_begin(g, pd->mem);
	341	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
	342	pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
	343	nvgpu_mem_end(g, pd->mem);
	344
	345	/*
	346	* Check if the aperture AND address are set
	347	*/
	348	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() \|\|
	349	gmmu_new_dual_pde_aperture_small_video_memory_f())) {
	350	u64 addr = ((u64) (pde_v[2] &
	351	gmmu_new_dual_pde_address_small_sys_f(~0)) <<
	352	gmmu_new_dual_pde_address_shift_v()) \|
	353	((u64) pde_v[3] << 32);
	354
	355	if (addr)
	356	pgsz = gmmu_page_size_small;
	357	}
	358
	359	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() \|
	360	gmmu_new_dual_pde_aperture_big_video_memory_f())) {
	361	u64 addr = ((u64) (pde_v[0] &
	362	gmmu_new_dual_pde_address_big_sys_f(~0)) <<
	363	gmmu_new_dual_pde_address_big_shift_v()) \|
	364	((u64) pde_v[1] << 32);
	365	if (addr) {
	366	/*
	367	* If small is set that means that somehow MM allowed
	368	* both small and big to be set, the PDE is not valid
	369	* and may be corrupted
	370	*/
	371	if (pgsz == gmmu_page_size_small) {
	372	nvgpu_err(g,
	373	"both small and big apertures enabled");
	374	return gmmu_nr_page_sizes;
	375	}
	376	}
	377	pgsz = gmmu_page_size_big;
	378	}
	379
	380	return pgsz;
	381	}
	382
	383	static const struct gk20a_mmu_level gp10b_mm_levels[] = {
	384	{.hi_bit = {48, 48},
	385	.lo_bit = {47, 47},
	386	.update_entry = update_gmmu_pde3_locked,
	387	.entry_size = 8,
	388	.get_pgsz = gk20a_get_pde_pgsz},
	389	{.hi_bit = {46, 46},
	390	.lo_bit = {38, 38},
	391	.update_entry = update_gmmu_pde3_locked,
	392	.entry_size = 8,
	393	.get_pgsz = gk20a_get_pde_pgsz},
	394	{.hi_bit = {37, 37},
	395	.lo_bit = {29, 29},
	396	.update_entry = update_gmmu_pde3_locked,
	397	.entry_size = 8,
	398	.get_pgsz = gk20a_get_pde_pgsz},
	399	{.hi_bit = {28, 28},
	400	.lo_bit = {21, 21},
	401	.update_entry = update_gmmu_pde0_locked,
	402	.entry_size = GP10B_PDE0_ENTRY_SIZE,
	403	.get_pgsz = gp10b_get_pde0_pgsz},
	404	{.hi_bit = {20, 20},
	405	.lo_bit = {12, 16},
	406	.update_entry = update_gmmu_pte_locked,
	407	.entry_size = 8,
	408	.get_pgsz = gk20a_get_pte_pgsz},
	409	{.update_entry = NULL}
	410	};
	411
	412	const struct gk20a_mmu_level gp10b_mm_get_mmu_levels(struct gk20a g,
	413	u32 big_page_size)
	414	{
	415	return gp10b_mm_levels;
	416	}
	417
	418	void gp10b_mm_init_pdb(struct gk20a g, struct nvgpu_mem inst_block,
	419	struct vm_gk20a *vm)
	420	{
	421	u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
	422	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
	423	u32 pdb_addr_hi = u64_hi32(pdb_addr);
	424
	425	gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
	426
	427	nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
	428	nvgpu_aperture_mask(g, vm->pdb.mem,
	429	ram_in_page_dir_base_target_sys_mem_ncoh_f(),
	430	ram_in_page_dir_base_target_vid_mem_f()) \|
	431	ram_in_page_dir_base_vol_true_f() \|
	432	ram_in_page_dir_base_lo_f(pdb_addr_lo) \|
	433	1 << 10);
	434
	435	nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
	436	ram_in_page_dir_base_hi_f(pdb_addr_hi));
	437	}
	438
	439	void gp10b_remove_bar2_vm(struct gk20a *g)
	440	{
	441	struct mm_gk20a *mm = &g->mm;
	442
	443	gp10b_replayable_pagefault_buffer_deinit(g);
	444	nvgpu_free_inst_block(g, &mm->bar2.inst_block);
	445	nvgpu_vm_put(mm->bar2.vm);
	446	}