1 files changed, 259 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
new file mode 100644
index 00000000..1ba07ca6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/scatterlist.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/page_allocator.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+void gk20a_vidmem_destroy(struct gk20a *g)
+{
+        if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+                nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
+}
+int gk20a_vidmem_clear_all(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct gk20a_fence *gk20a_fence_out = NULL;
+        u64 region2_base = 0;
+        int err = 0;
+        if (mm->vidmem.ce_ctx_id == (u32)~0)
+                return -EINVAL;
+        err = gk20a_ce_execute_ops(g,
+                        mm->vidmem.ce_ctx_id,
+                        0,
+                        mm->vidmem.base,
+                        mm->vidmem.bootstrap_base - mm->vidmem.base,
+                        0x00000000,
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB,
+                        NVGPU_CE_MEMSET,
+                        NULL,
+                        0,
+                        NULL);
+        if (err) {
+                nvgpu_err(g,
+                        "Failed to clear vidmem region 1 : %d", err);
+                return err;
+        }
+        region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
+        err = gk20a_ce_execute_ops(g,
+                        mm->vidmem.ce_ctx_id,
+                        0,
+                        region2_base,
+                        mm->vidmem.size - region2_base,
+                        0x00000000,
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB,
+                        NVGPU_CE_MEMSET,
+                        NULL,
+                        0,
+                        &gk20a_fence_out);
+        if (err) {
+                nvgpu_err(g,
+                        "Failed to clear vidmem region 2 : %d", err);
+                return err;
+        }
+        if (gk20a_fence_out) {
+                struct nvgpu_timeout timeout;
+                nvgpu_timeout_init(g, &timeout,
+                                   gk20a_get_gr_idle_timeout(g),
+                                   NVGPU_TIMER_CPU_TIMER);
+                do {
+                        err = gk20a_fence_wait(g, gk20a_fence_out,
+                                               gk20a_get_gr_idle_timeout(g));
+                } while (err == -ERESTARTSYS &&
+                         !nvgpu_timeout_expired(&timeout));
+                gk20a_fence_put(gk20a_fence_out);
+                if (err) {
+                        nvgpu_err(g,
+                                "fence wait failed for CE execute ops");
+                        return err;
+                }
+        }
+        mm->vidmem.cleared = true;
+        return 0;
+}
+int gk20a_init_vidmem(struct mm_gk20a *mm)
+{
+        struct gk20a *g = mm->g;
+        size_t size = g->ops.mm.get_vidmem_size ?
+                g->ops.mm.get_vidmem_size(g) : 0;
+        u64 bootstrap_base, bootstrap_size, base;
+        u64 default_page_size = SZ_64K;
+        int err;
+        static struct nvgpu_alloc_carveout wpr_co =
+                NVGPU_CARVEOUT("wpr-region", 0, SZ_16M);
+        if (!size)
+                return 0;
+        wpr_co.base = size - SZ_256M;
+        bootstrap_base = wpr_co.base;
+        bootstrap_size = SZ_16M;
+        base = default_page_size;
+        /*
+         * Bootstrap allocator for use before the CE is initialized (CE
+         * initialization requires vidmem but we want to use the CE to zero
+         * out vidmem before allocating it...
+         */
+        err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
+                                        "vidmem-bootstrap",
+                                        bootstrap_base, bootstrap_size,
+                                        SZ_4K, 0);
+        err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator,
+                                        "vidmem",
+                                        base, size - base,
+                                        default_page_size,
+                                        GPU_ALLOC_4K_VIDMEM_PAGES);
+        if (err) {
+                nvgpu_err(g, "Failed to register vidmem for size %zu: %d",
+                                size, err);
+                return err;
+        }
+        /* Reserve bootstrap region in vidmem allocator */
+        nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co);
+        mm->vidmem.base = base;
+        mm->vidmem.size = size - base;
+        mm->vidmem.bootstrap_base = bootstrap_base;
+        mm->vidmem.bootstrap_size = bootstrap_size;
+        nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
+        INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
+        nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
+        nvgpu_init_list_node(&mm->vidmem.clear_list_head);
+        nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
+        gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
+        return 0;
+}
+int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
+{
+        struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
+        gk20a_dbg_fn("");
+        if (!nvgpu_alloc_initialized(allocator))
+                return -ENOSYS;
+        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+        *space = nvgpu_alloc_space(allocator) +
+                nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
+        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+        return 0;
+}
+int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct gk20a_fence *gk20a_fence_out = NULL;
+        struct gk20a_fence *gk20a_last_fence = NULL;
+        struct nvgpu_page_alloc *alloc = NULL;
+        void *sgl = NULL;
+        int err = 0;
+        if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+                return -EINVAL;
+        alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
+        nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
+                if (gk20a_last_fence)
+                        gk20a_fence_put(gk20a_last_fence);
+                err = gk20a_ce_execute_ops(g,
+                        g->mm.vidmem.ce_ctx_id,
+                        0,
+                        nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+                        nvgpu_sgt_get_length(&alloc->sgt, sgl),
+                        0x00000000,
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB,
+                        NVGPU_CE_MEMSET,
+                        NULL,
+                        0,
+                        &gk20a_fence_out);
+                if (err) {
+                        nvgpu_err(g,
+                                "Failed gk20a_ce_execute_ops[%d]", err);
+                        return err;
+                }
+                gk20a_last_fence = gk20a_fence_out;
+        }
+        if (gk20a_last_fence) {
+                struct nvgpu_timeout timeout;
+                nvgpu_timeout_init(g, &timeout,
+                                   gk20a_get_gr_idle_timeout(g),
+                                   NVGPU_TIMER_CPU_TIMER);
+                do {
+                        err = gk20a_fence_wait(g, gk20a_last_fence,
+                                               gk20a_get_gr_idle_timeout(g));
+                } while (err == -ERESTARTSYS &&
+                         !nvgpu_timeout_expired(&timeout));
+                gk20a_fence_put(gk20a_last_fence);
+                if (err)
+                        nvgpu_err(g,
+                                "fence wait failed for CE execute ops");
+        }
+        return err;
+}
+struct nvgpu_mem *get_pending_mem_desc(struct mm_gk20a *mm)
+{
+        struct nvgpu_mem *mem = NULL;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
+                mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
+                                nvgpu_mem, clear_list_entry);
+                nvgpu_list_del(&mem->clear_list_entry);
+        }
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        return mem;
+}

diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c new file mode 100644 index 00000000..1ba07ca6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -0,0 +1,259 @@
	1	/*
	2	* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* Permission is hereby granted, free of charge, to any person obtaining a
	5	* copy of this software and associated documentation files (the "Software"),
	6	* to deal in the Software without restriction, including without limitation
	7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	8	* and/or sell copies of the Software, and to permit persons to whom the
	9	* Software is furnished to do so, subject to the following conditions:
	10	*
	11	* The above copyright notice and this permission notice shall be included in
	12	* all copies or substantial portions of the Software.
	13	*
	14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	20	* DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include <linux/scatterlist.h>
	24
	25	#include <nvgpu/vidmem.h>
	26	#include <nvgpu/page_allocator.h>
	27
	28	#include "gk20a/gk20a.h"
	29	#include "gk20a/mm_gk20a.h"
	30
	31	void gk20a_vidmem_destroy(struct gk20a *g)
	32	{
	33	if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
	34	nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
	35	}
	36
	37	int gk20a_vidmem_clear_all(struct gk20a *g)
	38	{
	39	struct mm_gk20a *mm = &g->mm;
	40	struct gk20a_fence *gk20a_fence_out = NULL;
	41	u64 region2_base = 0;
	42	int err = 0;
	43
	44	if (mm->vidmem.ce_ctx_id == (u32)~0)
	45	return -EINVAL;
	46
	47	err = gk20a_ce_execute_ops(g,
	48	mm->vidmem.ce_ctx_id,
	49	0,
	50	mm->vidmem.base,
	51	mm->vidmem.bootstrap_base - mm->vidmem.base,
	52	0x00000000,
	53	NVGPU_CE_DST_LOCATION_LOCAL_FB,
	54	NVGPU_CE_MEMSET,
	55	NULL,
	56	0,
	57	NULL);
	58	if (err) {
	59	nvgpu_err(g,
	60	"Failed to clear vidmem region 1 : %d", err);
	61	return err;
	62	}
	63
	64	region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
	65
	66	err = gk20a_ce_execute_ops(g,
	67	mm->vidmem.ce_ctx_id,
	68	0,
	69	region2_base,
	70	mm->vidmem.size - region2_base,
	71	0x00000000,
	72	NVGPU_CE_DST_LOCATION_LOCAL_FB,
	73	NVGPU_CE_MEMSET,
	74	NULL,
	75	0,
	76	&gk20a_fence_out);
	77	if (err) {
	78	nvgpu_err(g,
	79	"Failed to clear vidmem region 2 : %d", err);
	80	return err;
	81	}
	82
	83	if (gk20a_fence_out) {
	84	struct nvgpu_timeout timeout;
	85
	86	nvgpu_timeout_init(g, &timeout,
	87	gk20a_get_gr_idle_timeout(g),
	88	NVGPU_TIMER_CPU_TIMER);
	89
	90	do {
	91	err = gk20a_fence_wait(g, gk20a_fence_out,
	92	gk20a_get_gr_idle_timeout(g));
	93	} while (err == -ERESTARTSYS &&
	94	!nvgpu_timeout_expired(&timeout));
	95
	96	gk20a_fence_put(gk20a_fence_out);
	97	if (err) {
	98	nvgpu_err(g,
	99	"fence wait failed for CE execute ops");
	100	return err;
	101	}
	102	}
	103
	104	mm->vidmem.cleared = true;
	105
	106	return 0;
	107	}
	108
	109	int gk20a_init_vidmem(struct mm_gk20a *mm)
	110	{
	111	struct gk20a *g = mm->g;
	112	size_t size = g->ops.mm.get_vidmem_size ?
	113	g->ops.mm.get_vidmem_size(g) : 0;
	114	u64 bootstrap_base, bootstrap_size, base;
	115	u64 default_page_size = SZ_64K;
	116	int err;
	117
	118	static struct nvgpu_alloc_carveout wpr_co =
	119	NVGPU_CARVEOUT("wpr-region", 0, SZ_16M);
	120
	121	if (!size)
	122	return 0;
	123
	124	wpr_co.base = size - SZ_256M;
	125	bootstrap_base = wpr_co.base;
	126	bootstrap_size = SZ_16M;
	127	base = default_page_size;
	128
	129	/*
	130	* Bootstrap allocator for use before the CE is initialized (CE
	131	* initialization requires vidmem but we want to use the CE to zero
	132	* out vidmem before allocating it...
	133	*/
	134	err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
	135	"vidmem-bootstrap",
	136	bootstrap_base, bootstrap_size,
	137	SZ_4K, 0);
	138
	139	err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator,
	140	"vidmem",
	141	base, size - base,
	142	default_page_size,
	143	GPU_ALLOC_4K_VIDMEM_PAGES);
	144	if (err) {
	145	nvgpu_err(g, "Failed to register vidmem for size %zu: %d",
	146	size, err);
	147	return err;
	148	}
	149
	150	/* Reserve bootstrap region in vidmem allocator */
	151	nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co);
	152
	153	mm->vidmem.base = base;
	154	mm->vidmem.size = size - base;
	155	mm->vidmem.bootstrap_base = bootstrap_base;
	156	mm->vidmem.bootstrap_size = bootstrap_size;
	157
	158	nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
	159
	160	INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
	161	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
	162	nvgpu_init_list_node(&mm->vidmem.clear_list_head);
	163	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
	164
	165	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
	166
	167	return 0;
	168	}
	169
	170	int gk20a_vidmem_get_space(struct gk20a g, u64 space)
	171	{
	172	struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
	173
	174	gk20a_dbg_fn("");
	175
	176	if (!nvgpu_alloc_initialized(allocator))
	177	return -ENOSYS;
	178
	179	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
	180	*space = nvgpu_alloc_space(allocator) +
	181	nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
	182	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
	183	return 0;
	184	}
	185
	186	int gk20a_gmmu_clear_vidmem_mem(struct gk20a g, struct nvgpu_mem mem)
	187	{
	188	struct gk20a_fence *gk20a_fence_out = NULL;
	189	struct gk20a_fence *gk20a_last_fence = NULL;
	190	struct nvgpu_page_alloc *alloc = NULL;
	191	void *sgl = NULL;
	192	int err = 0;
	193
	194	if (g->mm.vidmem.ce_ctx_id == (u32)~0)
	195	return -EINVAL;
	196
	197	alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
	198
	199	nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
	200	if (gk20a_last_fence)
	201	gk20a_fence_put(gk20a_last_fence);
	202
	203	err = gk20a_ce_execute_ops(g,
	204	g->mm.vidmem.ce_ctx_id,
	205	0,
	206	nvgpu_sgt_get_phys(&alloc->sgt, sgl),
	207	nvgpu_sgt_get_length(&alloc->sgt, sgl),
	208	0x00000000,
	209	NVGPU_CE_DST_LOCATION_LOCAL_FB,
	210	NVGPU_CE_MEMSET,
	211	NULL,
	212	0,
	213	&gk20a_fence_out);
	214
	215	if (err) {
	216	nvgpu_err(g,
	217	"Failed gk20a_ce_execute_ops[%d]", err);
	218	return err;
	219	}
	220
	221	gk20a_last_fence = gk20a_fence_out;
	222	}
	223
	224	if (gk20a_last_fence) {
	225	struct nvgpu_timeout timeout;
	226
	227	nvgpu_timeout_init(g, &timeout,
	228	gk20a_get_gr_idle_timeout(g),
	229	NVGPU_TIMER_CPU_TIMER);
	230
	231	do {
	232	err = gk20a_fence_wait(g, gk20a_last_fence,
	233	gk20a_get_gr_idle_timeout(g));
	234	} while (err == -ERESTARTSYS &&
	235	!nvgpu_timeout_expired(&timeout));
	236
	237	gk20a_fence_put(gk20a_last_fence);
	238	if (err)
	239	nvgpu_err(g,
	240	"fence wait failed for CE execute ops");
	241	}
	242
	243	return err;
	244	}
	245
	246	struct nvgpu_mem get_pending_mem_desc(struct mm_gk20a mm)
	247	{
	248	struct nvgpu_mem *mem = NULL;
	249
	250	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
	251	if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
	252	mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
	253	nvgpu_mem, clear_list_entry);
	254	nvgpu_list_del(&mem->clear_list_entry);
	255	}
	256	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
	257
	258	return mem;
	259	}