From 3c37701377459fbea2b460e1b9c65a863dfb04b2 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 31 Jul 2017 12:32:07 -0700
Subject: gpu: nvgpu: Split VIDMEM support from mm_gk20a.c

Split VIDMEM support into its own code files organized as such:

  common/mm/vidmem.c     - Base vidmem support
  common/linux/vidmem.c  - Linux specific user-space interaction
  include/nvgpu/vidmem.h - Vidmem API definitions

Also use the config to enable/disable VIDMEM support in the makefile
and remove as many CONFIG_GK20A_VIDMEM preprocessor checks as possible
from the source code.

And lastly update a while-loop that iterated over an SGT to use the
new for_each construct for iterating over SGTs.

Currently this organization is not perfectly adhered to. More patches
will fix that.

JIRA NVGPU-30
JIRA NVGPU-138

Change-Id: Ic0f4d2cf38b65849c7dc350a69b175421477069c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1540705
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu.c      |   1 +
 drivers/gpu/nvgpu/common/mm/nvgpu_mem.c |   1 +
 drivers/gpu/nvgpu/common/mm/vidmem.c    | 259 ++++++++++++++++++++++++++++++++
 3 files changed, 261 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/common/mm/vidmem.c

(limited to 'drivers/gpu/nvgpu/common/mm')

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index f61ec3fc..1eed3a3b 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -28,6 +28,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/page_allocator.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/vidmem.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index faee482d..2b6e6e6a 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -23,6 +23,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/vidmem.h>
 
 #include "gk20a/gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
new file mode 100644
index 00000000..1ba07ca6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/scatterlist.h>
+
+#include <nvgpu/vidmem.h>
+#include <nvgpu/page_allocator.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+void gk20a_vidmem_destroy(struct gk20a *g)
+{
+	if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+		nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
+}
+
+int gk20a_vidmem_clear_all(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct gk20a_fence *gk20a_fence_out = NULL;
+	u64 region2_base = 0;
+	int err = 0;
+
+	if (mm->vidmem.ce_ctx_id == (u32)~0)
+		return -EINVAL;
+
+	err = gk20a_ce_execute_ops(g,
+			mm->vidmem.ce_ctx_id,
+			0,
+			mm->vidmem.base,
+			mm->vidmem.bootstrap_base - mm->vidmem.base,
+			0x00000000,
+			NVGPU_CE_DST_LOCATION_LOCAL_FB,
+			NVGPU_CE_MEMSET,
+			NULL,
+			0,
+			NULL);
+	if (err) {
+		nvgpu_err(g,
+			"Failed to clear vidmem region 1 : %d", err);
+		return err;
+	}
+
+	region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
+
+	err = gk20a_ce_execute_ops(g,
+			mm->vidmem.ce_ctx_id,
+			0,
+			region2_base,
+			mm->vidmem.size - region2_base,
+			0x00000000,
+			NVGPU_CE_DST_LOCATION_LOCAL_FB,
+			NVGPU_CE_MEMSET,
+			NULL,
+			0,
+			&gk20a_fence_out);
+	if (err) {
+		nvgpu_err(g,
+			"Failed to clear vidmem region 2 : %d", err);
+		return err;
+	}
+
+	if (gk20a_fence_out) {
+		struct nvgpu_timeout timeout;
+
+		nvgpu_timeout_init(g, &timeout,
+				   gk20a_get_gr_idle_timeout(g),
+				   NVGPU_TIMER_CPU_TIMER);
+
+		do {
+			err = gk20a_fence_wait(g, gk20a_fence_out,
+					       gk20a_get_gr_idle_timeout(g));
+		} while (err == -ERESTARTSYS &&
+			 !nvgpu_timeout_expired(&timeout));
+
+		gk20a_fence_put(gk20a_fence_out);
+		if (err) {
+			nvgpu_err(g,
+				"fence wait failed for CE execute ops");
+			return err;
+		}
+	}
+
+	mm->vidmem.cleared = true;
+
+	return 0;
+}
+
+int gk20a_init_vidmem(struct mm_gk20a *mm)
+{
+	struct gk20a *g = mm->g;
+	size_t size = g->ops.mm.get_vidmem_size ?
+		g->ops.mm.get_vidmem_size(g) : 0;
+	u64 bootstrap_base, bootstrap_size, base;
+	u64 default_page_size = SZ_64K;
+	int err;
+
+	static struct nvgpu_alloc_carveout wpr_co =
+		NVGPU_CARVEOUT("wpr-region", 0, SZ_16M);
+
+	if (!size)
+		return 0;
+
+	wpr_co.base = size - SZ_256M;
+	bootstrap_base = wpr_co.base;
+	bootstrap_size = SZ_16M;
+	base = default_page_size;
+
+	/*
+	 * Bootstrap allocator for use before the CE is initialized (CE
+	 * initialization requires vidmem but we want to use the CE to zero
+	 * out vidmem before allocating it...
+	 */
+	err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
+					"vidmem-bootstrap",
+					bootstrap_base, bootstrap_size,
+					SZ_4K, 0);
+
+	err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator,
+					"vidmem",
+					base, size - base,
+					default_page_size,
+					GPU_ALLOC_4K_VIDMEM_PAGES);
+	if (err) {
+		nvgpu_err(g, "Failed to register vidmem for size %zu: %d",
+				size, err);
+		return err;
+	}
+
+	/* Reserve bootstrap region in vidmem allocator */
+	nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co);
+
+	mm->vidmem.base = base;
+	mm->vidmem.size = size - base;
+	mm->vidmem.bootstrap_base = bootstrap_base;
+	mm->vidmem.bootstrap_size = bootstrap_size;
+
+	nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
+
+	INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
+	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
+	nvgpu_init_list_node(&mm->vidmem.clear_list_head);
+	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
+
+	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
+
+	return 0;
+}
+
+int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
+{
+	struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
+
+	gk20a_dbg_fn("");
+
+	if (!nvgpu_alloc_initialized(allocator))
+		return -ENOSYS;
+
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+	*space = nvgpu_alloc_space(allocator) +
+		nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+	return 0;
+}
+
+int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	struct gk20a_fence *gk20a_fence_out = NULL;
+	struct gk20a_fence *gk20a_last_fence = NULL;
+	struct nvgpu_page_alloc *alloc = NULL;
+	void *sgl = NULL;
+	int err = 0;
+
+	if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+		return -EINVAL;
+
+	alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
+
+	nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
+		if (gk20a_last_fence)
+			gk20a_fence_put(gk20a_last_fence);
+
+		err = gk20a_ce_execute_ops(g,
+			g->mm.vidmem.ce_ctx_id,
+			0,
+			nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+			nvgpu_sgt_get_length(&alloc->sgt, sgl),
+			0x00000000,
+			NVGPU_CE_DST_LOCATION_LOCAL_FB,
+			NVGPU_CE_MEMSET,
+			NULL,
+			0,
+			&gk20a_fence_out);
+
+		if (err) {
+			nvgpu_err(g,
+				"Failed gk20a_ce_execute_ops[%d]", err);
+			return err;
+		}
+
+		gk20a_last_fence = gk20a_fence_out;
+	}
+
+	if (gk20a_last_fence) {
+		struct nvgpu_timeout timeout;
+
+		nvgpu_timeout_init(g, &timeout,
+				   gk20a_get_gr_idle_timeout(g),
+				   NVGPU_TIMER_CPU_TIMER);
+
+		do {
+			err = gk20a_fence_wait(g, gk20a_last_fence,
+					       gk20a_get_gr_idle_timeout(g));
+		} while (err == -ERESTARTSYS &&
+			 !nvgpu_timeout_expired(&timeout));
+
+		gk20a_fence_put(gk20a_last_fence);
+		if (err)
+			nvgpu_err(g,
+				"fence wait failed for CE execute ops");
+	}
+
+	return err;
+}
+
+struct nvgpu_mem *get_pending_mem_desc(struct mm_gk20a *mm)
+{
+	struct nvgpu_mem *mem = NULL;
+
+	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+	if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
+		mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
+				nvgpu_mem, clear_list_entry);
+		nvgpu_list_del(&mem->clear_list_entry);
+	}
+	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+
+	return mem;
+}
-- 
cgit v1.2.2