From 2a285d0607a20694476399f5719e74dbc26fcd58 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Fri, 6 Oct 2017 11:30:29 -0700
Subject: gpu: nvgpu: Cleanup generic MM code in gk20a/mm_gk20a.c

Move much of the remaining generic MM code to a new common location:
common/mm/mm.c. Also add a corresponding <nvgpu/mm.h> header. This
mostly consists of init and cleanup code to handle the common MM
data structures like the VIDMEM code, address spaces for various
engines, etc.

A few more indepth changes were made as well.

1. alloc_inst_block() has been added to the MM HAL. This used to be
   defined directly in the gk20a code but it used a register. As a
   result, if this register hypothetically changes in the future,
   it would need to become a HAL anyway. This path preempts that
   and for now just defines all HALs to use the gk20a version.

2. Rename as much as possible: global functions are, for the most
   part, prepended with nvgpu (there are a few exceptions which I
   have yet to decide what to do with). Functions that are static
   are renamed to be as consistent with their functionality as
   possible since in some cases function effect and function name
   have diverged.

JIRA NVGPU-30

Change-Id: Ic948f1ecc2f7976eba4bb7169a44b7226bb7c0b5
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1574499
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 441 ++-----------------------------------
 1 file changed, 18 insertions(+), 423 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * GK20A memory management
- *
  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/vm_area.h>
 #include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
  *
  */
 
-static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
-
-static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
-{
-	gk20a_dbg_fn("");
-	if (g->ops.fb.reset)
-		g->ops.fb.reset(g);
-
-	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
-		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
-		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
-		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
-		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
-				g->blcg_enabled);
-
-	if (g->ops.fb.init_fs_state)
-		g->ops.fb.init_fs_state(g);
-
-	return 0;
-}
-
-static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-
-	if (mm->vidmem.ce_ctx_id != (u32)~0)
-		gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
-
-	mm->vidmem.ce_ctx_id = (u32)~0;
-
-	nvgpu_vm_put(mm->ce.vm);
-}
-
-static void gk20a_remove_mm_support(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-
-	if (g->ops.mm.fault_info_mem_destroy)
-		g->ops.mm.fault_info_mem_destroy(g);
-
-	if (g->ops.mm.remove_bar2_vm)
-		g->ops.mm.remove_bar2_vm(g);
-
-	if (g->ops.mm.is_bar1_supported(g)) {
-		gk20a_free_inst_block(g, &mm->bar1.inst_block);
-		nvgpu_vm_put(mm->bar1.vm);
-	}
-
-	gk20a_free_inst_block(g, &mm->pmu.inst_block);
-	gk20a_free_inst_block(g, &mm->hwpm.inst_block);
-	nvgpu_vm_put(mm->pmu.vm);
-	nvgpu_vm_put(mm->cde.vm);
-
-	nvgpu_semaphore_sea_destroy(g);
-	nvgpu_vidmem_destroy(g);
-	nvgpu_pd_cache_fini(g);
-}
-
-static int gk20a_alloc_sysmem_flush(struct gk20a *g)
-{
-	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
-}
-
-int gk20a_init_mm_setup_sw(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	int err;
-
-	gk20a_dbg_fn("");
-
-	if (mm->sw_ready) {
-		gk20a_dbg_fn("skip init");
-		return 0;
-	}
-
-	mm->g = g;
-	nvgpu_mutex_init(&mm->l2_op_lock);
-
-	/*TBD: make channel vm size configurable */
-	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
-		NV_MM_DEFAULT_KERNEL_SIZE;
-	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
-
-	gk20a_dbg_info("channel vm size: user %dMB  kernel %dMB",
-		       (int)(mm->channel.user_size >> 20),
-		       (int)(mm->channel.kernel_size >> 20));
-
-	nvgpu_init_pramin(mm);
-
-	mm->vidmem.ce_ctx_id = (u32)~0;
-
-	err = nvgpu_vidmem_init(mm);
-	if (err)
-		return err;
-
-	/*
-	 * this requires fixed allocations in vidmem which must be
-	 * allocated before all other buffers
-	 */
-	if (g->ops.pmu.alloc_blob_space
-			&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
-		err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
-		if (err)
-			return err;
-	}
-
-	err = gk20a_alloc_sysmem_flush(g);
-	if (err)
-		return err;
-
-	if (g->ops.mm.is_bar1_supported(g)) {
-		err = gk20a_init_bar1_vm(mm);
-		if (err)
-			return err;
-	}
-	if (g->ops.mm.init_bar2_vm) {
-		err = g->ops.mm.init_bar2_vm(g);
-		if (err)
-			return err;
-	}
-	err = gk20a_init_system_vm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_hwpm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_cde_vm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_ce_vm(mm);
-	if (err)
-		return err;
-
-	mm->remove_support = gk20a_remove_mm_support;
-	mm->remove_ce_support = gk20a_remove_mm_ce_support;
-
-	mm->sw_ready = true;
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
 /* make sure gk20a_init_mm_support is called before */
 int gk20a_init_mm_setup_hw(struct gk20a *g)
 {
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-int gk20a_init_mm_support(struct gk20a *g)
-{
-	u32 err;
-
-	err = gk20a_init_mm_reset_enable_hw(g);
-	if (err)
-		return err;
-
-	err = gk20a_init_mm_setup_sw(g);
-	if (err)
-		return err;
-
-	if (g->ops.mm.init_mm_setup_hw)
-		err = g->ops.mm.init_mm_setup_hw(g);
-
-	return err;
-}
-
-void gk20a_init_mm_ce_context(struct gk20a *g)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
-		g->mm.vidmem.ce_ctx_id =
-			gk20a_ce_create_context_with_cb(g,
-				gk20a_fifo_get_fast_ce_runlist_id(g),
-				-1,
-				-1,
-				-1,
-				NULL);
-
-		if (g->mm.vidmem.ce_ctx_id == (u32)~0)
-			nvgpu_err(g,
-				"Failed to allocate CE context for vidmem page clearing support");
-	}
-#endif
-}
-
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
 	return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
 	{.update_entry = NULL}
 };
 
-/*
- * Attempt to find a reserved memory area to determine PTE size for the passed
- * mapping. If no reserved area can be found use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-					      u64 base, u64 size)
-{
-	struct nvgpu_vm_area *vm_area;
-
-	vm_area = nvgpu_vm_area_find(vm, base);
-	if (!vm_area)
-		return gmmu_page_size_small;
-
-	return vm_area->pgsz_idx;
-}
-
-/*
- * This is for when the address space does not support unified address spaces.
- */
-static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
-					       u64 base, u64 size)
-{
-	if (!base) {
-		if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-			return gmmu_page_size_big;
-		return gmmu_page_size_small;
-	} else {
-		if (base < __nv_gmmu_va_small_page_limit())
-			return gmmu_page_size_small;
-		else
-			return gmmu_page_size_big;
-	}
-}
-
-/*
- * This determines the PTE size for a given alloc. Used by both the GVA space
- * allocator and the mm core code so that agreement can be reached on how to
- * map allocations.
- *
- * The page size of a buffer is this:
- *
- *   o  If the VM doesn't support large pages then obviously small pages
- *      must be used.
- *   o  If the base address is non-zero (fixed address map):
- *      - Attempt to find a reserved memory area and use the page size
- *        based on that.
- *      - If no reserved page size is available, default to small pages.
- *   o  If the base is zero:
- *      - If the size is larger than or equal to the big page size, use big
- *        pages.
- *      - Otherwise use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	if (!vm->big_pages)
-		return gmmu_page_size_small;
-
-	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
-		return __get_pte_size_split_addr(vm, base, size);
-
-	if (base)
-		return __get_pte_size_fixed_map(vm, base, size);
-
-	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-		return gmmu_page_size_big;
-	return gmmu_page_size_small;
-}
-
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
 	int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
 	return __gk20a_vm_bind_channel(as_share->vm, ch);
 }
 
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	int err;
-
-	gk20a_dbg_fn("");
-
-	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
-	if (err) {
-		nvgpu_err(g, "%s: memory allocation failed", __func__);
-		return err;
-	}
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	if (inst_block->size)
-		nvgpu_dma_free(g, inst_block);
-}
-
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	if (g->mm.has_physical_mode)
-		return nvgpu_mem_get_phys_addr(g, inst_block);
-	else
-		return nvgpu_mem_get_addr(g, inst_block);
-}
-
-static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
-	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-	mm->bar1.vm = nvgpu_vm_init(g,
-				    big_page_size,
-				    SZ_4K,
-				    mm->bar1.aperture_size - SZ_4K,
-				    mm->bar1.aperture_size,
-				    true, false,
-				    "bar1");
-	if (!mm->bar1.vm)
-		return -ENOMEM;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		goto clean_up_vm;
-	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
-
-	return 0;
-
-clean_up_vm:
-	nvgpu_vm_put(mm->bar1.vm);
-	return err;
-}
-
-/* pmu vm, share channel_vm interfaces */
-static int gk20a_init_system_vm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-	u32 low_hole, aperture_size;
-
-	/*
-	 * No user region - so we will pass that as zero sized.
-	 */
-	low_hole = SZ_4K * 16;
-	aperture_size = GK20A_PMU_VA_SIZE * 2;
-
-	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
-	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
-
-	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
-				   low_hole,
-				   aperture_size - low_hole,
-				   aperture_size,
-				   true,
-				   false,
-				   "system");
-	if (!mm->pmu.vm)
-		return -ENOMEM;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		goto clean_up_vm;
-	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
-
-	return 0;
-
-clean_up_vm:
-	nvgpu_vm_put(mm->pmu.vm);
-	return err;
-}
-
-static int gk20a_init_hwpm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		return err;
-	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
-
-	return 0;
-}
-
-static int gk20a_init_cde_vm(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
-				   big_page_size << 10,
-				   NV_MM_DEFAULT_KERNEL_SIZE,
-				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-				   false, false, "cde");
-	if (!mm->cde.vm)
-		return -ENOMEM;
-	return 0;
-}
-
-static int gk20a_init_ce_vm(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
-				  big_page_size << 10,
-				  NV_MM_DEFAULT_KERNEL_SIZE,
-				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-				  false, false, "ce");
-	if (!mm->ce.vm)
-		return -ENOMEM;
-	return 0;
-}
-
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm)
 {
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);
 
 	gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
-		gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
+		nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
 
 	g->ops.mm.init_pdb(g, inst_block, vm);
 
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 		g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
 }
 
+int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
+	if (err) {
+		nvgpu_err(g, "%s: memory allocation failed", __func__);
+		return err;
+	}
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
 int gk20a_mm_fb_flush(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
 	gk20a_idle_nosuspend(g);
 }
 
-int gk20a_mm_suspend(struct gk20a *g)
-{
-	gk20a_dbg_fn("");
-
-	nvgpu_vidmem_thread_pause_sync(&g->mm);
-
-	g->ops.mm.cbc_clean(g);
-	g->ops.mm.l2_flush(g, false);
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
 	return 34;
-- 
cgit v1.2.2