gpu: nvgpu: Cleanup generic MM code in gk20a/mm_gk20a.c

Move much of the remaining generic MM code to a new common location: common/mm/mm.c. Also add a corresponding <nvgpu/mm.h> header. This mostly consists of init and cleanup code to handle the common MM data structures like the VIDMEM code, address spaces for various engines, etc. A few more indepth changes were made as well. 1. alloc_inst_block() has been added to the MM HAL. This used to be defined directly in the gk20a code but it used a register. As a result, if this register hypothetically changes in the future, it would need to become a HAL anyway. This path preempts that and for now just defines all HALs to use the gk20a version. 2. Rename as much as possible: global functions are, for the most part, prepended with nvgpu (there are a few exceptions which I have yet to decide what to do with). Functions that are static are renamed to be as consistent with their functionality as possible since in some cases function effect and function name have diverged. JIRA NVGPU-30 Change-Id: Ic948f1ecc2f7976eba4bb7169a44b7226bb7c0b5 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1574499 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-10-06 14:30:29 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-10-24 18:16:49 -0400
commit: 2a285d0607a20694476399f5719e74dbc26fcd58 (patch)
tree: ef0246e3ca7b933ce3ea4c74061f61cc2e394b8b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent: 748331cbab1c7af26ab1fbae5ead2cdaff22806a (diff)
1 files changed, 18 insertions, 423 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * GK20A memory management
- *
 * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
 #include <trace/events/gk20a.h>
+#include <nvgpu/mm.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/vm_area.h>
 #include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
 *
 */
-static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
-static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
-{
-        gk20a_dbg_fn("");
-        if (g->ops.fb.reset)
-                g->ops.fb.reset(g);
-        if (g->ops.clock_gating.slcg_fb_load_gating_prod)
-                g->ops.clock_gating.slcg_fb_load_gating_prod(g,
-                                g->slcg_enabled);
-        if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
-                g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
-                                g->slcg_enabled);
-        if (g->ops.clock_gating.blcg_fb_load_gating_prod)
-                g->ops.clock_gating.blcg_fb_load_gating_prod(g,
-                                g->blcg_enabled);
-        if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
-                g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
-                                g->blcg_enabled);
-        if (g->ops.fb.init_fs_state)
-                g->ops.fb.init_fs_state(g);
-        return 0;
-}
-static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        if (mm->vidmem.ce_ctx_id != (u32)~0)
-                gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
-        mm->vidmem.ce_ctx_id = (u32)~0;
-        nvgpu_vm_put(mm->ce.vm);
-}
-static void gk20a_remove_mm_support(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        if (g->ops.mm.fault_info_mem_destroy)
-                g->ops.mm.fault_info_mem_destroy(g);
-        if (g->ops.mm.remove_bar2_vm)
-                g->ops.mm.remove_bar2_vm(g);
-        if (g->ops.mm.is_bar1_supported(g)) {
-                gk20a_free_inst_block(g, &mm->bar1.inst_block);
-                nvgpu_vm_put(mm->bar1.vm);
-        }
-        gk20a_free_inst_block(g, &mm->pmu.inst_block);
-        gk20a_free_inst_block(g, &mm->hwpm.inst_block);
-        nvgpu_vm_put(mm->pmu.vm);
-        nvgpu_vm_put(mm->cde.vm);
-        nvgpu_semaphore_sea_destroy(g);
-        nvgpu_vidmem_destroy(g);
-        nvgpu_pd_cache_fini(g);
-}
-static int gk20a_alloc_sysmem_flush(struct gk20a *g)
-{
-        return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
-}
-int gk20a_init_mm_setup_sw(struct gk20a *g)
-{
-        struct mm_gk20a *mm = &g->mm;
-        int err;
-        gk20a_dbg_fn("");
-        if (mm->sw_ready) {
-                gk20a_dbg_fn("skip init");
-                return 0;
-        }
-        mm->g = g;
-        nvgpu_mutex_init(&mm->l2_op_lock);
-        /*TBD: make channel vm size configurable */
-        mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
-                NV_MM_DEFAULT_KERNEL_SIZE;
-        mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
-        gk20a_dbg_info("channel vm size: user %dMB  kernel %dMB",
-                       (int)(mm->channel.user_size >> 20),
-                       (int)(mm->channel.kernel_size >> 20));
-        nvgpu_init_pramin(mm);
-        mm->vidmem.ce_ctx_id = (u32)~0;
-        err = nvgpu_vidmem_init(mm);
-        if (err)
-                return err;
-        /*
-         * this requires fixed allocations in vidmem which must be
-         * allocated before all other buffers
-         */
-        if (g->ops.pmu.alloc_blob_space
-                        && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
-                err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
-                if (err)
-                        return err;
-        }
-        err = gk20a_alloc_sysmem_flush(g);
-        if (err)
-                return err;
-        if (g->ops.mm.is_bar1_supported(g)) {
-                err = gk20a_init_bar1_vm(mm);
-                if (err)
-                        return err;
-        }
-        if (g->ops.mm.init_bar2_vm) {
-                err = g->ops.mm.init_bar2_vm(g);
-                if (err)
-                        return err;
-        }
-        err = gk20a_init_system_vm(mm);
-        if (err)
-                return err;
-        err = gk20a_init_hwpm(mm);
-        if (err)
-                return err;
-        err = gk20a_init_cde_vm(mm);
-        if (err)
-                return err;
-        err = gk20a_init_ce_vm(mm);
-        if (err)
-                return err;
-        mm->remove_support = gk20a_remove_mm_support;
-        mm->remove_ce_support = gk20a_remove_mm_ce_support;
-        mm->sw_ready = true;
-        gk20a_dbg_fn("done");
-        return 0;
-}
 /* make sure gk20a_init_mm_support is called before */
 int gk20a_init_mm_setup_hw(struct gk20a *g)
 {
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
        return 0;
 }
-int gk20a_init_mm_support(struct gk20a *g)
-{
-        u32 err;
-        err = gk20a_init_mm_reset_enable_hw(g);
-        if (err)
-                return err;
-        err = gk20a_init_mm_setup_sw(g);
-        if (err)
-                return err;
-        if (g->ops.mm.init_mm_setup_hw)
-                err = g->ops.mm.init_mm_setup_hw(g);
-        return err;
-}
-void gk20a_init_mm_ce_context(struct gk20a *g)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-        if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
-                g->mm.vidmem.ce_ctx_id =
-                        gk20a_ce_create_context_with_cb(g,
-                                gk20a_fifo_get_fast_ce_runlist_id(g),
-                                -1,
-                                -1,
-                                -1,
-                                NULL);
-                if (g->mm.vidmem.ce_ctx_id == (u32)~0)
-                        nvgpu_err(g,
-                                "Failed to allocate CE context for vidmem page clearing support");
-        }
-#endif
-}
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
        return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
        {.update_entry = NULL}
 };
-/*
- * Attempt to find a reserved memory area to determine PTE size for the passed
- * mapping. If no reserved area can be found use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-                                              u64 base, u64 size)
-{
-        struct nvgpu_vm_area *vm_area;
-        vm_area = nvgpu_vm_area_find(vm, base);
-        if (!vm_area)
-                return gmmu_page_size_small;
-        return vm_area->pgsz_idx;
-}
-/*
- * This is for when the address space does not support unified address spaces.
- */
-static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
-                                               u64 base, u64 size)
-{
-        if (!base) {
-                if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-                        return gmmu_page_size_big;
-                return gmmu_page_size_small;
-        } else {
-                if (base < __nv_gmmu_va_small_page_limit())
-                        return gmmu_page_size_small;
-                else
-                        return gmmu_page_size_big;
-        }
-}
-/*
- * This determines the PTE size for a given alloc. Used by both the GVA space
- * allocator and the mm core code so that agreement can be reached on how to
- * map allocations.
- *
- * The page size of a buffer is this:
- *
- *   o  If the VM doesn't support large pages then obviously small pages
- *      must be used.
- *   o  If the base address is non-zero (fixed address map):
- *      - Attempt to find a reserved memory area and use the page size
- *        based on that.
- *      - If no reserved page size is available, default to small pages.
- *   o  If the base is zero:
- *      - If the size is larger than or equal to the big page size, use big
- *        pages.
- *      - Otherwise use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        if (!vm->big_pages)
-                return gmmu_page_size_small;
-        if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
-                return __get_pte_size_split_addr(vm, base, size);
-        if (base)
-                return __get_pte_size_fixed_map(vm, base, size);
-        if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-                return gmmu_page_size_big;
-        return gmmu_page_size_small;
-}
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
        int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
        return __gk20a_vm_bind_channel(as_share->vm, ch);
 }
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        int err;
-        gk20a_dbg_fn("");
-        err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
-        if (err) {
-                nvgpu_err(g, "%s: memory allocation failed", __func__);
-                return err;
-        }
-        gk20a_dbg_fn("done");
-        return 0;
-}
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        if (inst_block->size)
-                nvgpu_dma_free(g, inst_block);
-}
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-        if (g->mm.has_physical_mode)
-                return nvgpu_mem_get_phys_addr(g, inst_block);
-        else
-                return nvgpu_mem_get_addr(g, inst_block);
-}
-static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
-{
-        int err;
-        struct gk20a *g = gk20a_from_mm(mm);
-        struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
-        gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-        mm->bar1.vm = nvgpu_vm_init(g,
-                                    big_page_size,
-                                    SZ_4K,
-                                    mm->bar1.aperture_size - SZ_4K,
-                                    mm->bar1.aperture_size,
-                                    true, false,
-                                    "bar1");
-        if (!mm->bar1.vm)
-                return -ENOMEM;
-        err = gk20a_alloc_inst_block(g, inst_block);
-        if (err)
-                goto clean_up_vm;
-        g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
-        return 0;
-clean_up_vm:
-        nvgpu_vm_put(mm->bar1.vm);
-        return err;
-}
-/* pmu vm, share channel_vm interfaces */
-static int gk20a_init_system_vm(struct mm_gk20a *mm)
-{
-        int err;
-        struct gk20a *g = gk20a_from_mm(mm);
-        struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        u32 low_hole, aperture_size;
-        /*
-         * No user region - so we will pass that as zero sized.
-         */
-        low_hole = SZ_4K * 16;
-        aperture_size = GK20A_PMU_VA_SIZE * 2;
-        mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
-        gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
-        mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
-                                   low_hole,
-                                   aperture_size - low_hole,
-                                   aperture_size,
-                                   true,
-                                   false,
-                                   "system");
-        if (!mm->pmu.vm)
-                return -ENOMEM;
-        err = gk20a_alloc_inst_block(g, inst_block);
-        if (err)
-                goto clean_up_vm;
-        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
-        return 0;
-clean_up_vm:
-        nvgpu_vm_put(mm->pmu.vm);
-        return err;
-}
-static int gk20a_init_hwpm(struct mm_gk20a *mm)
-{
-        int err;
-        struct gk20a *g = gk20a_from_mm(mm);
-        struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
-        err = gk20a_alloc_inst_block(g, inst_block);
-        if (err)
-                return err;
-        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
-        return 0;
-}
-static int gk20a_init_cde_vm(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        mm->cde.vm = nvgpu_vm_init(g, big_page_size,
-                                   big_page_size << 10,
-                                   NV_MM_DEFAULT_KERNEL_SIZE,
-                                   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-                                   false, false, "cde");
-        if (!mm->cde.vm)
-                return -ENOMEM;
-        return 0;
-}
-static int gk20a_init_ce_vm(struct mm_gk20a *mm)
-{
-        struct gk20a *g = gk20a_from_mm(mm);
-        u32 big_page_size = g->ops.mm.get_default_big_page_size();
-        mm->ce.vm = nvgpu_vm_init(g, big_page_size,
-                                  big_page_size << 10,
-                                  NV_MM_DEFAULT_KERNEL_SIZE,
-                                  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-                                  false, false, "ce");
-        if (!mm->ce.vm)
-                return -ENOMEM;
-        return 0;
-}
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
                struct vm_gk20a *vm)
 {
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
        struct gk20a *g = gk20a_from_vm(vm);
        gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
-                gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
+                nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
        g->ops.mm.init_pdb(g, inst_block, vm);
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
                g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
 }
+int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
+        if (err) {
+                nvgpu_err(g, "%s: memory allocation failed", __func__);
+                return err;
+        }
+        gk20a_dbg_fn("done");
+        return 0;
+}
 int gk20a_mm_fb_flush(struct gk20a *g)
 {
        struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
        gk20a_idle_nosuspend(g);
 }
-int gk20a_mm_suspend(struct gk20a *g)
-{
-        gk20a_dbg_fn("");
-        nvgpu_vidmem_thread_pause_sync(&g->mm);
-        g->ops.mm.cbc_clean(g);
-        g->ops.mm.l2_flush(g, false);
-        gk20a_dbg_fn("done");
-        return 0;
-}
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
        return 34;
author	Alex Waterman <alexw@nvidia.com>	2017-10-06 14:30:29 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-10-24 18:16:49 -0400
commit	2a285d0607a20694476399f5719e74dbc26fcd58 (patch)
tree	ef0246e3ca7b933ce3ea4c74061f61cc2e394b8b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent	748331cbab1c7af26ab1fbae5ead2cdaff22806a (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index d96fa4e1..a17d6bb6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
1	/*	1	/*
2	* GK20A memory management
3	*
4	* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.	2	* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5	*	3	*
6	* Permission is hereby granted, free of charge, to any person obtaining a	4	* Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
24		22
25	#include <trace/events/gk20a.h>	23	#include <trace/events/gk20a.h>
26		24
		25	#include <nvgpu/mm.h>
27	#include <nvgpu/vm.h>	26	#include <nvgpu/vm.h>
28	#include <nvgpu/vm_area.h>	27	#include <nvgpu/vm_area.h>
29	#include <nvgpu/dma.h>	28	#include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
88	*	87	*
89	*/	88	*/
90		89
91	static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
92	static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
93	static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
94	static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
95	static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
96
97	static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
98	{
99	gk20a_dbg_fn("");
100	if (g->ops.fb.reset)
101	g->ops.fb.reset(g);
102
103	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
104	g->ops.clock_gating.slcg_fb_load_gating_prod(g,
105	g->slcg_enabled);
106	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
107	g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
108	g->slcg_enabled);
109	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
110	g->ops.clock_gating.blcg_fb_load_gating_prod(g,
111	g->blcg_enabled);
112	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
113	g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
114	g->blcg_enabled);
115
116	if (g->ops.fb.init_fs_state)
117	g->ops.fb.init_fs_state(g);
118
119	return 0;
120	}
121
122	static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
123	{
124	struct gk20a *g = gk20a_from_mm(mm);
125
126	if (mm->vidmem.ce_ctx_id != (u32)~0)
127	gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
128
129	mm->vidmem.ce_ctx_id = (u32)~0;
130
131	nvgpu_vm_put(mm->ce.vm);
132	}
133
134	static void gk20a_remove_mm_support(struct mm_gk20a *mm)
135	{
136	struct gk20a *g = gk20a_from_mm(mm);
137
138	if (g->ops.mm.fault_info_mem_destroy)
139	g->ops.mm.fault_info_mem_destroy(g);
140
141	if (g->ops.mm.remove_bar2_vm)
142	g->ops.mm.remove_bar2_vm(g);
143
144	if (g->ops.mm.is_bar1_supported(g)) {
145	gk20a_free_inst_block(g, &mm->bar1.inst_block);
146	nvgpu_vm_put(mm->bar1.vm);
147	}
148
149	gk20a_free_inst_block(g, &mm->pmu.inst_block);
150	gk20a_free_inst_block(g, &mm->hwpm.inst_block);
151	nvgpu_vm_put(mm->pmu.vm);
152	nvgpu_vm_put(mm->cde.vm);
153
154	nvgpu_semaphore_sea_destroy(g);
155	nvgpu_vidmem_destroy(g);
156	nvgpu_pd_cache_fini(g);
157	}
158
159	static int gk20a_alloc_sysmem_flush(struct gk20a *g)
160	{
161	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
162	}
163
164	int gk20a_init_mm_setup_sw(struct gk20a *g)
165	{
166	struct mm_gk20a *mm = &g->mm;
167	int err;
168
169	gk20a_dbg_fn("");
170
171	if (mm->sw_ready) {
172	gk20a_dbg_fn("skip init");
173	return 0;
174	}
175
176	mm->g = g;
177	nvgpu_mutex_init(&mm->l2_op_lock);
178
179	/TBD: make channel vm size configurable /
180	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
181	NV_MM_DEFAULT_KERNEL_SIZE;
182	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
183
184	gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
185	(int)(mm->channel.user_size >> 20),
186	(int)(mm->channel.kernel_size >> 20));
187
188	nvgpu_init_pramin(mm);
189
190	mm->vidmem.ce_ctx_id = (u32)~0;
191
192	err = nvgpu_vidmem_init(mm);
193	if (err)
194	return err;
195
196	/*
197	* this requires fixed allocations in vidmem which must be
198	* allocated before all other buffers
199	*/
200	if (g->ops.pmu.alloc_blob_space
201	&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
202	err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
203	if (err)
204	return err;
205	}
206
207	err = gk20a_alloc_sysmem_flush(g);
208	if (err)
209	return err;
210
211	if (g->ops.mm.is_bar1_supported(g)) {
212	err = gk20a_init_bar1_vm(mm);
213	if (err)
214	return err;
215	}
216	if (g->ops.mm.init_bar2_vm) {
217	err = g->ops.mm.init_bar2_vm(g);
218	if (err)
219	return err;
220	}
221	err = gk20a_init_system_vm(mm);
222	if (err)
223	return err;
224
225	err = gk20a_init_hwpm(mm);
226	if (err)
227	return err;
228
229	err = gk20a_init_cde_vm(mm);
230	if (err)
231	return err;
232
233	err = gk20a_init_ce_vm(mm);
234	if (err)
235	return err;
236
237	mm->remove_support = gk20a_remove_mm_support;
238	mm->remove_ce_support = gk20a_remove_mm_ce_support;
239
240	mm->sw_ready = true;
241
242	gk20a_dbg_fn("done");
243	return 0;
244	}
245
246	/* make sure gk20a_init_mm_support is called before */	90	/* make sure gk20a_init_mm_support is called before */
247	int gk20a_init_mm_setup_hw(struct gk20a *g)	91	int gk20a_init_mm_setup_hw(struct gk20a *g)
248	{	92	{
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
274	return 0;	118	return 0;
275	}	119	}
276		120
277	int gk20a_init_mm_support(struct gk20a *g)
278	{
279	u32 err;
280
281	err = gk20a_init_mm_reset_enable_hw(g);
282	if (err)
283	return err;
284
285	err = gk20a_init_mm_setup_sw(g);
286	if (err)
287	return err;
288
289	if (g->ops.mm.init_mm_setup_hw)
290	err = g->ops.mm.init_mm_setup_hw(g);
291
292	return err;
293	}
294
295	void gk20a_init_mm_ce_context(struct gk20a *g)
296	{
297	#if defined(CONFIG_GK20A_VIDMEM)
298	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
299	g->mm.vidmem.ce_ctx_id =
300	gk20a_ce_create_context_with_cb(g,
301	gk20a_fifo_get_fast_ce_runlist_id(g),
302	-1,
303	-1,
304	-1,
305	NULL);
306
307	if (g->mm.vidmem.ce_ctx_id == (u32)~0)
308	nvgpu_err(g,
309	"Failed to allocate CE context for vidmem page clearing support");
310	}
311	#endif
312	}
313
314	int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)	121	int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
315	{	122	{
316	return vm->mmu_levels[0].lo_bit[0];	123	return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
505	{.update_entry = NULL}	312	{.update_entry = NULL}
506	};	313	};
507		314
508	/*
509	* Attempt to find a reserved memory area to determine PTE size for the passed
510	* mapping. If no reserved area can be found use small pages.
511	*/
512	enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
513	u64 base, u64 size)
514	{
515	struct nvgpu_vm_area *vm_area;
516
517	vm_area = nvgpu_vm_area_find(vm, base);
518	if (!vm_area)
519	return gmmu_page_size_small;
520
521	return vm_area->pgsz_idx;
522	}
523
524	/*
525	* This is for when the address space does not support unified address spaces.
526	*/
527	static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
528	u64 base, u64 size)
529	{
530	if (!base) {
531	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
532	return gmmu_page_size_big;
533	return gmmu_page_size_small;
534	} else {
535	if (base < __nv_gmmu_va_small_page_limit())
536	return gmmu_page_size_small;
537	else
538	return gmmu_page_size_big;
539	}
540	}
541
542	/*
543	* This determines the PTE size for a given alloc. Used by both the GVA space
544	* allocator and the mm core code so that agreement can be reached on how to
545	* map allocations.
546	*
547	* The page size of a buffer is this:
548	*
549	* o If the VM doesn't support large pages then obviously small pages
550	* must be used.
551	* o If the base address is non-zero (fixed address map):
552	* - Attempt to find a reserved memory area and use the page size
553	* based on that.
554	* - If no reserved page size is available, default to small pages.
555	* o If the base is zero:
556	* - If the size is larger than or equal to the big page size, use big
557	* pages.
558	* - Otherwise use small pages.
559	*/
560	enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
561	{
562	struct gk20a *g = gk20a_from_vm(vm);
563
564	if (!vm->big_pages)
565	return gmmu_page_size_small;
566
567	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
568	return __get_pte_size_split_addr(vm, base, size);
569
570	if (base)
571	return __get_pte_size_fixed_map(vm, base, size);
572
573	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
574	return gmmu_page_size_big;
575	return gmmu_page_size_small;
576	}
577
578	int __gk20a_vm_bind_channel(struct vm_gk20a vm, struct channel_gk20a ch)	315	int __gk20a_vm_bind_channel(struct vm_gk20a vm, struct channel_gk20a ch)
579	{	316	{
580	int err = 0;	317	int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
599	return __gk20a_vm_bind_channel(as_share->vm, ch);	336	return __gk20a_vm_bind_channel(as_share->vm, ch);
600	}	337	}
601		338
602	int gk20a_alloc_inst_block(struct gk20a g, struct nvgpu_mem inst_block)
603	{
604	int err;
605
606	gk20a_dbg_fn("");
607
608	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
609	if (err) {
610	nvgpu_err(g, "%s: memory allocation failed", __func__);
611	return err;
612	}
613
614	gk20a_dbg_fn("done");
615	return 0;
616	}
617
618	void gk20a_free_inst_block(struct gk20a g, struct nvgpu_mem inst_block)
619	{
620	if (inst_block->size)
621	nvgpu_dma_free(g, inst_block);
622	}
623
624	u64 gk20a_mm_inst_block_addr(struct gk20a g, struct nvgpu_mem inst_block)
625	{
626	if (g->mm.has_physical_mode)
627	return nvgpu_mem_get_phys_addr(g, inst_block);
628	else
629	return nvgpu_mem_get_addr(g, inst_block);
630	}
631
632	static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
633	{
634	int err;
635	struct gk20a *g = gk20a_from_mm(mm);
636	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
637	u32 big_page_size = g->ops.mm.get_default_big_page_size();
638
639	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
640	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
641	mm->bar1.vm = nvgpu_vm_init(g,
642	big_page_size,
643	SZ_4K,
644	mm->bar1.aperture_size - SZ_4K,
645	mm->bar1.aperture_size,
646	true, false,
647	"bar1");
648	if (!mm->bar1.vm)
649	return -ENOMEM;
650
651	err = gk20a_alloc_inst_block(g, inst_block);
652	if (err)
653	goto clean_up_vm;
654	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
655
656	return 0;
657
658	clean_up_vm:
659	nvgpu_vm_put(mm->bar1.vm);
660	return err;
661	}
662
663	/* pmu vm, share channel_vm interfaces */
664	static int gk20a_init_system_vm(struct mm_gk20a *mm)
665	{
666	int err;
667	struct gk20a *g = gk20a_from_mm(mm);
668	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
669	u32 big_page_size = g->ops.mm.get_default_big_page_size();
670	u32 low_hole, aperture_size;
671
672	/*
673	* No user region - so we will pass that as zero sized.
674	*/
675	low_hole = SZ_4K * 16;
676	aperture_size = GK20A_PMU_VA_SIZE * 2;
677
678	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
679	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
680
681	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
682	low_hole,
683	aperture_size - low_hole,
684	aperture_size,
685	true,
686	false,
687	"system");
688	if (!mm->pmu.vm)
689	return -ENOMEM;
690
691	err = gk20a_alloc_inst_block(g, inst_block);
692	if (err)
693	goto clean_up_vm;
694	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
695
696	return 0;
697
698	clean_up_vm:
699	nvgpu_vm_put(mm->pmu.vm);
700	return err;
701	}
702
703	static int gk20a_init_hwpm(struct mm_gk20a *mm)
704	{
705	int err;
706	struct gk20a *g = gk20a_from_mm(mm);
707	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
708
709	err = gk20a_alloc_inst_block(g, inst_block);
710	if (err)
711	return err;
712	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
713
714	return 0;
715	}
716
717	static int gk20a_init_cde_vm(struct mm_gk20a *mm)
718	{
719	struct gk20a *g = gk20a_from_mm(mm);
720	u32 big_page_size = g->ops.mm.get_default_big_page_size();
721
722	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
723	big_page_size << 10,
724	NV_MM_DEFAULT_KERNEL_SIZE,
725	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
726	false, false, "cde");
727	if (!mm->cde.vm)
728	return -ENOMEM;
729	return 0;
730	}
731
732	static int gk20a_init_ce_vm(struct mm_gk20a *mm)
733	{
734	struct gk20a *g = gk20a_from_mm(mm);
735	u32 big_page_size = g->ops.mm.get_default_big_page_size();
736
737	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
738	big_page_size << 10,
739	NV_MM_DEFAULT_KERNEL_SIZE,
740	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
741	false, false, "ce");
742	if (!mm->ce.vm)
743	return -ENOMEM;
744	return 0;
745	}
746
747	void gk20a_mm_init_pdb(struct gk20a g, struct nvgpu_mem inst_block,	339	void gk20a_mm_init_pdb(struct gk20a g, struct nvgpu_mem inst_block,
748	struct vm_gk20a *vm)	340	struct vm_gk20a *vm)
749	{	341	{
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem inst_block, struct vm_gk20a vm,
770	struct gk20a *g = gk20a_from_vm(vm);	362	struct gk20a *g = gk20a_from_vm(vm);
771		363
772	gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",	364	gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
773	gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);	365	nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
774		366
775	g->ops.mm.init_pdb(g, inst_block, vm);	367	g->ops.mm.init_pdb(g, inst_block, vm);
776		368
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem inst_block, struct vm_gk20a vm,
784	g->ops.mm.set_big_page_size(g, inst_block, big_page_size);	376	g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
785	}	377	}
786		378
		379	int gk20a_alloc_inst_block(struct gk20a g, struct nvgpu_mem inst_block)
		380	{
		381	int err;
		382
		383	gk20a_dbg_fn("");
		384
		385	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
		386	if (err) {
		387	nvgpu_err(g, "%s: memory allocation failed", __func__);
		388	return err;
		389	}
		390
		391	gk20a_dbg_fn("done");
		392	return 0;
		393	}
		394
787	int gk20a_mm_fb_flush(struct gk20a *g)	395	int gk20a_mm_fb_flush(struct gk20a *g)
788	{	396	{
789	struct mm_gk20a *mm = &g->mm;	397	struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
992	gk20a_idle_nosuspend(g);	600	gk20a_idle_nosuspend(g);
993	}	601	}
994		602
995	int gk20a_mm_suspend(struct gk20a *g)
996	{
997	gk20a_dbg_fn("");
998
999	nvgpu_vidmem_thread_pause_sync(&g->mm);
1000
1001	g->ops.mm.cbc_clean(g);
1002	g->ops.mm.l2_flush(g, false);
1003
1004	gk20a_dbg_fn("done");
1005	return 0;
1006	}
1007
1008	u32 gk20a_mm_get_iommu_bit(struct gk20a *g)	603	u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
1009	{	604	{
1010	return 34;	605	return 34;