gpu: nvgpu: Cleanup generic MM code in gk20a/mm_gk20a.c

Move much of the remaining generic MM code to a new common location: common/mm/mm.c. Also add a corresponding <nvgpu/mm.h> header. This mostly consists of init and cleanup code to handle the common MM data structures like the VIDMEM code, address spaces for various engines, etc. A few more indepth changes were made as well. 1. alloc_inst_block() has been added to the MM HAL. This used to be defined directly in the gk20a code but it used a register. As a result, if this register hypothetically changes in the future, it would need to become a HAL anyway. This path preempts that and for now just defines all HALs to use the gk20a version. 2. Rename as much as possible: global functions are, for the most part, prepended with nvgpu (there are a few exceptions which I have yet to decide what to do with). Functions that are static are renamed to be as consistent with their functionality as possible since in some cases function effect and function name have diverged. JIRA NVGPU-30 Change-Id: Ic948f1ecc2f7976eba4bb7169a44b7226bb7c0b5 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1574499 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-10-06 14:30:29 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-10-24 18:16:49 -0400
commit: 2a285d0607a20694476399f5719e74dbc26fcd58 (patch)
tree: ef0246e3ca7b933ce3ea4c74061f61cc2e394b8b /drivers/gpu/nvgpu/common/mm
parent: 748331cbab1c7af26ab1fbae5ead2cdaff22806a (diff)
2 files changed, 428 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index c6f10a69..a2546e9d 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -25,8 +25,8 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/log2.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
-#include "gk20a/mm_gk20a.h"
+#include <nvgpu/vm.h>
 #include "buddy_allocator_priv.h"
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
new file mode 100644
index 00000000..1027ed28
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,426 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/pramin.h>
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+/*
+ * Attempt to find a reserved memory area to determine PTE size for the passed
+ * mapping. If no reserved area can be found use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+                                              u64 base, u64 size)
+{
+        struct nvgpu_vm_area *vm_area;
+        vm_area = nvgpu_vm_area_find(vm, base);
+        if (!vm_area)
+                return gmmu_page_size_small;
+        return vm_area->pgsz_idx;
+}
+/*
+ * This is for when the address space does not support unified address spaces.
+ */
+static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
+                                               u64 base, u64 size)
+{
+        if (!base) {
+                if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+                        return gmmu_page_size_big;
+                return gmmu_page_size_small;
+        } else {
+                if (base < __nv_gmmu_va_small_page_limit())
+                        return gmmu_page_size_small;
+                else
+                        return gmmu_page_size_big;
+        }
+}
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ *
+ * The page size of a buffer is this:
+ *
+ *   o  If the VM doesn't support large pages then obviously small pages
+ *      must be used.
+ *   o  If the base address is non-zero (fixed address map):
+ *      - Attempt to find a reserved memory area and use the page size
+ *        based on that.
+ *      - If no reserved page size is available, default to small pages.
+ *   o  If the base is zero:
+ *      - If the size is larger than or equal to the big page size, use big
+ *        pages.
+ *      - Otherwise use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        if (!vm->big_pages)
+                return gmmu_page_size_small;
+        if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
+                return __get_pte_size_split_addr(vm, base, size);
+        if (base)
+                return __get_pte_size_fixed_map(vm, base, size);
+        if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+                return gmmu_page_size_big;
+        return gmmu_page_size_small;
+}
+int nvgpu_mm_suspend(struct gk20a *g)
+{
+        nvgpu_info(g, "MM suspend running...");
+        nvgpu_vidmem_thread_pause_sync(&g->mm);
+        g->ops.mm.cbc_clean(g);
+        g->ops.mm.l2_flush(g, false);
+        nvgpu_info(g, "MM suspend done!");
+        return 0;
+}
+u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+        if (g->mm.has_physical_mode)
+                return nvgpu_mem_get_phys_addr(g, inst_block);
+        else
+                return nvgpu_mem_get_addr(g, inst_block);
+}
+void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+        if (nvgpu_mem_is_valid(inst_block))
+                nvgpu_dma_free(g, inst_block);
+}
+static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
+{
+        return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
+}
+static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        if (mm->vidmem.ce_ctx_id != (u32)~0)
+                gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
+        mm->vidmem.ce_ctx_id = (u32)~0;
+        nvgpu_vm_put(mm->ce.vm);
+}
+static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        if (g->ops.mm.fault_info_mem_destroy)
+                g->ops.mm.fault_info_mem_destroy(g);
+        if (g->ops.mm.remove_bar2_vm)
+                g->ops.mm.remove_bar2_vm(g);
+        if (g->ops.mm.is_bar1_supported(g)) {
+                nvgpu_free_inst_block(g, &mm->bar1.inst_block);
+                nvgpu_vm_put(mm->bar1.vm);
+        }
+        nvgpu_free_inst_block(g, &mm->pmu.inst_block);
+        nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
+        nvgpu_vm_put(mm->pmu.vm);
+        nvgpu_vm_put(mm->cde.vm);
+        nvgpu_semaphore_sea_destroy(g);
+        nvgpu_vidmem_destroy(g);
+        nvgpu_pd_cache_fini(g);
+}
+/* pmu vm, share channel_vm interfaces */
+static int nvgpu_init_system_vm(struct mm_gk20a *mm)
+{
+        int err;
+        struct gk20a *g = gk20a_from_mm(mm);
+        struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        u32 low_hole, aperture_size;
+        /*
+         * No user region - so we will pass that as zero sized.
+         */
+        low_hole = SZ_4K * 16;
+        aperture_size = GK20A_PMU_VA_SIZE * 2;
+        mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
+        nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
+        mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
+                                   low_hole,
+                                   aperture_size - low_hole,
+                                   aperture_size,
+                                   true,
+                                   false,
+                                   "system");
+        if (!mm->pmu.vm)
+                return -ENOMEM;
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                goto clean_up_vm;
+        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
+        return 0;
+clean_up_vm:
+        nvgpu_vm_put(mm->pmu.vm);
+        return err;
+}
+static int nvgpu_init_hwpm(struct mm_gk20a *mm)
+{
+        int err;
+        struct gk20a *g = gk20a_from_mm(mm);
+        struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                return err;
+        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
+        return 0;
+}
+static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        mm->cde.vm = nvgpu_vm_init(g, big_page_size,
+                                   big_page_size << 10,
+                                   NV_MM_DEFAULT_KERNEL_SIZE,
+                                   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+                                   false, false, "cde");
+        if (!mm->cde.vm)
+                return -ENOMEM;
+        return 0;
+}
+static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        mm->ce.vm = nvgpu_vm_init(g, big_page_size,
+                                  big_page_size << 10,
+                                  NV_MM_DEFAULT_KERNEL_SIZE,
+                                  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+                                  false, false, "ce");
+        if (!mm->ce.vm)
+                return -ENOMEM;
+        return 0;
+}
+void nvgpu_init_mm_ce_context(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
+                g->mm.vidmem.ce_ctx_id =
+                        gk20a_ce_create_context_with_cb(g,
+                                gk20a_fifo_get_fast_ce_runlist_id(g),
+                                -1,
+                                -1,
+                                -1,
+                                NULL);
+                if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+                        nvgpu_err(g,
+                                "Failed to allocate CE context for vidmem page clearing support");
+        }
+#endif
+}
+static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
+{
+        if (g->ops.fb.reset)
+                g->ops.fb.reset(g);
+        if (g->ops.clock_gating.slcg_fb_load_gating_prod)
+                g->ops.clock_gating.slcg_fb_load_gating_prod(g,
+                                g->slcg_enabled);
+        if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
+                g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
+                                g->slcg_enabled);
+        if (g->ops.clock_gating.blcg_fb_load_gating_prod)
+                g->ops.clock_gating.blcg_fb_load_gating_prod(g,
+                                g->blcg_enabled);
+        if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
+                g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
+                                g->blcg_enabled);
+        if (g->ops.fb.init_fs_state)
+                g->ops.fb.init_fs_state(g);
+        return 0;
+}
+static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
+{
+        int err;
+        struct gk20a *g = gk20a_from_mm(mm);
+        struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
+        nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
+        mm->bar1.vm = nvgpu_vm_init(g,
+                                    big_page_size,
+                                    SZ_4K,
+                                    mm->bar1.aperture_size - SZ_4K,
+                                    mm->bar1.aperture_size,
+                                    true, false,
+                                    "bar1");
+        if (!mm->bar1.vm)
+                return -ENOMEM;
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                goto clean_up_vm;
+        g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
+        return 0;
+clean_up_vm:
+        nvgpu_vm_put(mm->bar1.vm);
+        return err;
+}
+static int nvgpu_init_mm_setup_sw(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        int err;
+        if (mm->sw_ready) {
+                nvgpu_info(g, "skip init");
+                return 0;
+        }
+        mm->g = g;
+        nvgpu_mutex_init(&mm->l2_op_lock);
+        /*TBD: make channel vm size configurable */
+        mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
+                NV_MM_DEFAULT_KERNEL_SIZE;
+        mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
+        nvgpu_info(g, "channel vm size: user %dMB  kernel %dMB",
+                   (int)(mm->channel.user_size >> 20),
+                   (int)(mm->channel.kernel_size >> 20));
+        nvgpu_init_pramin(mm);
+        mm->vidmem.ce_ctx_id = (u32)~0;
+        err = nvgpu_vidmem_init(mm);
+        if (err)
+                return err;
+        /*
+         * this requires fixed allocations in vidmem which must be
+         * allocated before all other buffers
+         */
+        if (g->ops.pmu.alloc_blob_space
+                        && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
+                err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
+                if (err)
+                        return err;
+        }
+        err = nvgpu_alloc_sysmem_flush(g);
+        if (err)
+                return err;
+        if (g->ops.mm.is_bar1_supported(g)) {
+                err = nvgpu_init_bar1_vm(mm);
+                if (err)
+                        return err;
+        }
+        if (g->ops.mm.init_bar2_vm) {
+                err = g->ops.mm.init_bar2_vm(g);
+                if (err)
+                        return err;
+        }
+        err = nvgpu_init_system_vm(mm);
+        if (err)
+                return err;
+        err = nvgpu_init_hwpm(mm);
+        if (err)
+                return err;
+        err = nvgpu_init_cde_vm(mm);
+        if (err)
+                return err;
+        err = nvgpu_init_ce_vm(mm);
+        if (err)
+                return err;
+        mm->remove_support = nvgpu_remove_mm_support;
+        mm->remove_ce_support = nvgpu_remove_mm_ce_support;
+        mm->sw_ready = true;
+        return 0;
+}
+int nvgpu_init_mm_support(struct gk20a *g)
+{
+        u32 err;
+        err = nvgpu_init_mm_reset_enable_hw(g);
+        if (err)
+                return err;
+        err = nvgpu_init_mm_setup_sw(g);
+        if (err)
+                return err;
+        if (g->ops.mm.init_mm_setup_hw)
+                err = g->ops.mm.init_mm_setup_hw(g);
+        return err;
+}
author	Alex Waterman <alexw@nvidia.com>	2017-10-06 14:30:29 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-10-24 18:16:49 -0400
commit	2a285d0607a20694476399f5719e74dbc26fcd58 (patch)
tree	ef0246e3ca7b933ce3ea4c74061f61cc2e394b8b /drivers/gpu/nvgpu/common/mm
parent	748331cbab1c7af26ab1fbae5ead2cdaff22806a (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index c6f10a69..a2546e9d 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -25,8 +25,8 @@
25	#include <nvgpu/bug.h>	25	#include <nvgpu/bug.h>
26	#include <nvgpu/log2.h>	26	#include <nvgpu/log2.h>
27	#include <nvgpu/barrier.h>	27	#include <nvgpu/barrier.h>
28		28	#include <nvgpu/mm.h>
29	#include "gk20a/mm_gk20a.h"	29	#include <nvgpu/vm.h>
30		30
31	#include "buddy_allocator_priv.h"	31	#include "buddy_allocator_priv.h"
32		32


diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c new file mode 100644 index 00000000..1027ed28 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,426 @@
		1	/*
		2	* Permission is hereby granted, free of charge, to any person obtaining a
		3	* copy of this software and associated documentation files (the "Software"),
		4	* to deal in the Software without restriction, including without limitation
		5	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		6	* and/or sell copies of the Software, and to permit persons to whom the
		7	* Software is furnished to do so, subject to the following conditions:
		8	*
		9	* The above copyright notice and this permission notice shall be included in
		10	* all copies or substantial portions of the Software.
		11	*
		12	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		13	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		14	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		15	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		16	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		17	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		18	* DEALINGS IN THE SOFTWARE.
		19	*/
		20
		21	#include <nvgpu/mm.h>
		22	#include <nvgpu/vm.h>
		23	#include <nvgpu/dma.h>
		24	#include <nvgpu/vm_area.h>
		25	#include <nvgpu/gmmu.h>
		26	#include <nvgpu/vidmem.h>
		27	#include <nvgpu/semaphore.h>
		28	#include <nvgpu/pramin.h>
		29	#include <nvgpu/enabled.h>
		30
		31	#include "gk20a/gk20a.h"
		32
		33	/*
		34	* Attempt to find a reserved memory area to determine PTE size for the passed
		35	* mapping. If no reserved area can be found use small pages.
		36	*/
		37	enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
		38	u64 base, u64 size)
		39	{
		40	struct nvgpu_vm_area *vm_area;
		41
		42	vm_area = nvgpu_vm_area_find(vm, base);
		43	if (!vm_area)
		44	return gmmu_page_size_small;
		45
		46	return vm_area->pgsz_idx;
		47	}
		48
		49	/*
		50	* This is for when the address space does not support unified address spaces.
		51	*/
		52	static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
		53	u64 base, u64 size)
		54	{
		55	if (!base) {
		56	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
		57	return gmmu_page_size_big;
		58	return gmmu_page_size_small;
		59	} else {
		60	if (base < __nv_gmmu_va_small_page_limit())
		61	return gmmu_page_size_small;
		62	else
		63	return gmmu_page_size_big;
		64	}
		65	}
		66
		67	/*
		68	* This determines the PTE size for a given alloc. Used by both the GVA space
		69	* allocator and the mm core code so that agreement can be reached on how to
		70	* map allocations.
		71	*
		72	* The page size of a buffer is this:
		73	*
		74	* o If the VM doesn't support large pages then obviously small pages
		75	* must be used.
		76	* o If the base address is non-zero (fixed address map):
		77	* - Attempt to find a reserved memory area and use the page size
		78	* based on that.
		79	* - If no reserved page size is available, default to small pages.
		80	* o If the base is zero:
		81	* - If the size is larger than or equal to the big page size, use big
		82	* pages.
		83	* - Otherwise use small pages.
		84	*/
		85	enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
		86	{
		87	struct gk20a *g = gk20a_from_vm(vm);
		88
		89	if (!vm->big_pages)
		90	return gmmu_page_size_small;
		91
		92	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
		93	return __get_pte_size_split_addr(vm, base, size);
		94
		95	if (base)
		96	return __get_pte_size_fixed_map(vm, base, size);
		97
		98	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
		99	return gmmu_page_size_big;
		100	return gmmu_page_size_small;
		101	}
		102
		103	int nvgpu_mm_suspend(struct gk20a *g)
		104	{
		105	nvgpu_info(g, "MM suspend running...");
		106
		107	nvgpu_vidmem_thread_pause_sync(&g->mm);
		108
		109	g->ops.mm.cbc_clean(g);
		110	g->ops.mm.l2_flush(g, false);
		111
		112	nvgpu_info(g, "MM suspend done!");
		113
		114	return 0;
		115	}
		116
		117	u64 nvgpu_inst_block_addr(struct gk20a g, struct nvgpu_mem inst_block)
		118	{
		119	if (g->mm.has_physical_mode)
		120	return nvgpu_mem_get_phys_addr(g, inst_block);
		121	else
		122	return nvgpu_mem_get_addr(g, inst_block);
		123	}
		124
		125	void nvgpu_free_inst_block(struct gk20a g, struct nvgpu_mem inst_block)
		126	{
		127	if (nvgpu_mem_is_valid(inst_block))
		128	nvgpu_dma_free(g, inst_block);
		129	}
		130
		131	static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
		132	{
		133	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
		134	}
		135
		136	static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
		137	{
		138	struct gk20a *g = gk20a_from_mm(mm);
		139
		140	if (mm->vidmem.ce_ctx_id != (u32)~0)
		141	gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
		142
		143	mm->vidmem.ce_ctx_id = (u32)~0;
		144
		145	nvgpu_vm_put(mm->ce.vm);
		146	}
		147
		148	static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
		149	{
		150	struct gk20a *g = gk20a_from_mm(mm);
		151
		152	if (g->ops.mm.fault_info_mem_destroy)
		153	g->ops.mm.fault_info_mem_destroy(g);
		154
		155	if (g->ops.mm.remove_bar2_vm)
		156	g->ops.mm.remove_bar2_vm(g);
		157
		158	if (g->ops.mm.is_bar1_supported(g)) {
		159	nvgpu_free_inst_block(g, &mm->bar1.inst_block);
		160	nvgpu_vm_put(mm->bar1.vm);
		161	}
		162
		163	nvgpu_free_inst_block(g, &mm->pmu.inst_block);
		164	nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
		165	nvgpu_vm_put(mm->pmu.vm);
		166	nvgpu_vm_put(mm->cde.vm);
		167
		168	nvgpu_semaphore_sea_destroy(g);
		169	nvgpu_vidmem_destroy(g);
		170	nvgpu_pd_cache_fini(g);
		171	}
		172
		173	/* pmu vm, share channel_vm interfaces */
		174	static int nvgpu_init_system_vm(struct mm_gk20a *mm)
		175	{
		176	int err;
		177	struct gk20a *g = gk20a_from_mm(mm);
		178	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
		179	u32 big_page_size = g->ops.mm.get_default_big_page_size();
		180	u32 low_hole, aperture_size;
		181
		182	/*
		183	* No user region - so we will pass that as zero sized.
		184	*/
		185	low_hole = SZ_4K * 16;
		186	aperture_size = GK20A_PMU_VA_SIZE * 2;
		187
		188	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
		189	nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
		190
		191	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
		192	low_hole,
		193	aperture_size - low_hole,
		194	aperture_size,
		195	true,
		196	false,
		197	"system");
		198	if (!mm->pmu.vm)
		199	return -ENOMEM;
		200
		201	err = g->ops.mm.alloc_inst_block(g, inst_block);
		202	if (err)
		203	goto clean_up_vm;
		204	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
		205
		206	return 0;
		207
		208	clean_up_vm:
		209	nvgpu_vm_put(mm->pmu.vm);
		210	return err;
		211	}
		212
		213	static int nvgpu_init_hwpm(struct mm_gk20a *mm)
		214	{
		215	int err;
		216	struct gk20a *g = gk20a_from_mm(mm);
		217	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
		218
		219	err = g->ops.mm.alloc_inst_block(g, inst_block);
		220	if (err)
		221	return err;
		222	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
		223
		224	return 0;
		225	}
		226
		227	static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
		228	{
		229	struct gk20a *g = gk20a_from_mm(mm);
		230	u32 big_page_size = g->ops.mm.get_default_big_page_size();
		231
		232	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
		233	big_page_size << 10,
		234	NV_MM_DEFAULT_KERNEL_SIZE,
		235	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
		236	false, false, "cde");
		237	if (!mm->cde.vm)
		238	return -ENOMEM;
		239	return 0;
		240	}
		241
		242	static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
		243	{
		244	struct gk20a *g = gk20a_from_mm(mm);
		245	u32 big_page_size = g->ops.mm.get_default_big_page_size();
		246
		247	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
		248	big_page_size << 10,
		249	NV_MM_DEFAULT_KERNEL_SIZE,
		250	NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
		251	false, false, "ce");
		252	if (!mm->ce.vm)
		253	return -ENOMEM;
		254	return 0;
		255	}
		256
		257	void nvgpu_init_mm_ce_context(struct gk20a *g)
		258	{
		259	#if defined(CONFIG_GK20A_VIDMEM)
		260	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
		261	g->mm.vidmem.ce_ctx_id =
		262	gk20a_ce_create_context_with_cb(g,
		263	gk20a_fifo_get_fast_ce_runlist_id(g),
		264	-1,
		265	-1,
		266	-1,
		267	NULL);
		268
		269	if (g->mm.vidmem.ce_ctx_id == (u32)~0)
		270	nvgpu_err(g,
		271	"Failed to allocate CE context for vidmem page clearing support");
		272	}
		273	#endif
		274	}
		275
		276	static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
		277	{
		278	if (g->ops.fb.reset)
		279	g->ops.fb.reset(g);
		280
		281	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
		282	g->ops.clock_gating.slcg_fb_load_gating_prod(g,
		283	g->slcg_enabled);
		284	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
		285	g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
		286	g->slcg_enabled);
		287	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
		288	g->ops.clock_gating.blcg_fb_load_gating_prod(g,
		289	g->blcg_enabled);
		290	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
		291	g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
		292	g->blcg_enabled);
		293
		294	if (g->ops.fb.init_fs_state)
		295	g->ops.fb.init_fs_state(g);
		296
		297	return 0;
		298	}
		299
		300	static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
		301	{
		302	int err;
		303	struct gk20a *g = gk20a_from_mm(mm);
		304	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
		305	u32 big_page_size = g->ops.mm.get_default_big_page_size();
		306
		307	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
		308	nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
		309	mm->bar1.vm = nvgpu_vm_init(g,
		310	big_page_size,
		311	SZ_4K,
		312	mm->bar1.aperture_size - SZ_4K,
		313	mm->bar1.aperture_size,
		314	true, false,
		315	"bar1");
		316	if (!mm->bar1.vm)
		317	return -ENOMEM;
		318
		319	err = g->ops.mm.alloc_inst_block(g, inst_block);
		320	if (err)
		321	goto clean_up_vm;
		322	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
		323
		324	return 0;
		325
		326	clean_up_vm:
		327	nvgpu_vm_put(mm->bar1.vm);
		328	return err;
		329	}
		330
		331	static int nvgpu_init_mm_setup_sw(struct gk20a *g)
		332	{
		333	struct mm_gk20a *mm = &g->mm;
		334	int err;
		335
		336	if (mm->sw_ready) {
		337	nvgpu_info(g, "skip init");
		338	return 0;
		339	}
		340
		341	mm->g = g;
		342	nvgpu_mutex_init(&mm->l2_op_lock);
		343
		344	/TBD: make channel vm size configurable /
		345	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
		346	NV_MM_DEFAULT_KERNEL_SIZE;
		347	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
		348
		349	nvgpu_info(g, "channel vm size: user %dMB kernel %dMB",
		350	(int)(mm->channel.user_size >> 20),
		351	(int)(mm->channel.kernel_size >> 20));
		352
		353	nvgpu_init_pramin(mm);
		354
		355	mm->vidmem.ce_ctx_id = (u32)~0;
		356
		357	err = nvgpu_vidmem_init(mm);
		358	if (err)
		359	return err;
		360
		361	/*
		362	* this requires fixed allocations in vidmem which must be
		363	* allocated before all other buffers
		364	*/
		365	if (g->ops.pmu.alloc_blob_space
		366	&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
		367	err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
		368	if (err)
		369	return err;
		370	}
		371
		372	err = nvgpu_alloc_sysmem_flush(g);
		373	if (err)
		374	return err;
		375
		376	if (g->ops.mm.is_bar1_supported(g)) {
		377	err = nvgpu_init_bar1_vm(mm);
		378	if (err)
		379	return err;
		380	}
		381	if (g->ops.mm.init_bar2_vm) {
		382	err = g->ops.mm.init_bar2_vm(g);
		383	if (err)
		384	return err;
		385	}
		386	err = nvgpu_init_system_vm(mm);
		387	if (err)
		388	return err;
		389
		390	err = nvgpu_init_hwpm(mm);
		391	if (err)
		392	return err;
		393
		394	err = nvgpu_init_cde_vm(mm);
		395	if (err)
		396	return err;
		397
		398	err = nvgpu_init_ce_vm(mm);
		399	if (err)
		400	return err;
		401
		402	mm->remove_support = nvgpu_remove_mm_support;
		403	mm->remove_ce_support = nvgpu_remove_mm_ce_support;
		404
		405	mm->sw_ready = true;
		406
		407	return 0;
		408	}
		409
		410	int nvgpu_init_mm_support(struct gk20a *g)
		411	{
		412	u32 err;
		413
		414	err = nvgpu_init_mm_reset_enable_hw(g);
		415	if (err)
		416	return err;
		417
		418	err = nvgpu_init_mm_setup_sw(g);
		419	if (err)
		420	return err;
		421
		422	if (g->ops.mm.init_mm_setup_hw)
		423	err = g->ops.mm.init_mm_setup_hw(g);
		424
		425	return err;
		426	}