1 files changed, 138 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index a2ed3f3a..695347bc 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -15,12 +15,150 @@
 */
 #include <nvgpu/log.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/nvgpu_mem.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
+static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
+                                 struct gk20a_mm_entry *entry)
+{
+        u32 num_pages = 1 << order;
+        u32 len = num_pages * PAGE_SIZE;
+        int err;
+        struct page *pages;
+        struct gk20a *g = vm->mm->g;
+        /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */
+        pages = alloc_pages(GFP_KERNEL, order);
+        if (!pages) {
+                nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed");
+                goto err_out;
+        }
+        entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt));
+        if (!entry->mem.priv.sgt) {
+                nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table");
+                goto err_alloced;
+        }
+        err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL);
+        if (err) {
+                nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed");
+                goto err_sg_table;
+        }
+        sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0);
+        entry->mem.cpu_va = page_address(pages);
+        memset(entry->mem.cpu_va, 0, len);
+        entry->mem.size = len;
+        entry->mem.aperture = APERTURE_SYSMEM;
+        FLUSH_CPU_DCACHE(entry->mem.cpu_va,
+                         sg_phys(entry->mem.priv.sgt->sgl), len);
+        return 0;
+err_sg_table:
+        nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
+err_alloced:
+        __free_pages(pages, order);
+err_out:
+        return -ENOMEM;
+}
+static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
+                                  struct gk20a_mm_entry *entry)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        u32 num_pages = 1 << order;
+        u32 len = num_pages * PAGE_SIZE;
+        int err;
+        if (g->is_fmodel)
+                return alloc_gmmu_phys_pages(vm, order, entry);
+        /*
+         * On arm32 we're limited by vmalloc space, so we do not map pages by
+         * default.
+         */
+        if (IS_ENABLED(CONFIG_ARM64))
+                err = nvgpu_dma_alloc(g, len, &entry->mem);
+        else
+                err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
+                                len, &entry->mem);
+        if (err) {
+                nvgpu_err(g, "memory allocation failed");
+                return -ENOMEM;
+        }
+        return 0;
+}
+/*
+ * Allocate a phys contig region big enough for a full
+ * sized gmmu page table for the given gmmu_page_size.
+ * the whole range is zeroed so it's "invalid"/will fault.
+ *
+ * If a previous entry is supplied, its memory will be used for
+ * suballocation for this next entry too, if there is space.
+ */
+int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
+                                 enum gmmu_pgsz_gk20a pgsz_idx,
+                                 const struct gk20a_mmu_level *l,
+                                 struct gk20a_mm_entry *entry,
+                                 struct gk20a_mm_entry *prev_entry)
+{
+        int err = -ENOMEM;
+        int order;
+        struct gk20a *g = gk20a_from_vm(vm);
+        u32 bytes;
+        /* allocate enough pages for the table */
+        order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
+        order += ilog2(l->entry_size);
+        bytes = 1 << order;
+        order -= PAGE_SHIFT;
+        if (order < 0 && prev_entry) {
+                /* try to suballocate from previous chunk */
+                u32 capacity = prev_entry->mem.size / bytes;
+                u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
+                u32 free = capacity - prev - 1;
+                nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
+                                capacity, prev, free, bytes);
+                if (free) {
+                        memcpy(&entry->mem, &prev_entry->mem,
+                                        sizeof(entry->mem));
+                        entry->woffset = prev_entry->woffset
+                                + bytes / sizeof(u32);
+                        err = 0;
+                }
+        }
+        if (err) {
+                /* no suballoc space */
+                order = max(0, order);
+                err = nvgpu_alloc_gmmu_pages(vm, order, entry);
+                entry->woffset = 0;
+        }
+        nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
+                  entry,
+                  (entry->mem.priv.sgt &&
+                   entry->mem.aperture == APERTURE_SYSMEM) ?
+                  g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
+                  order, entry->woffset);
+        if (err)
+                return err;
+        entry->pgsz = pgsz_idx;
+        entry->mem.skip_wmb = true;
+        return err;
+}
 /*
 * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
 * VA will be allocated for you. If addr is non-zero then the buffer will be

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index a2ed3f3a..695347bc 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -15,12 +15,150 @@
15	*/	15	*/
16		16
17	#include <nvgpu/log.h>	17	#include <nvgpu/log.h>
		18	#include <nvgpu/dma.h>
18	#include <nvgpu/gmmu.h>	19	#include <nvgpu/gmmu.h>
19	#include <nvgpu/nvgpu_mem.h>	20	#include <nvgpu/nvgpu_mem.h>
20		21
21	#include "gk20a/gk20a.h"	22	#include "gk20a/gk20a.h"
22	#include "gk20a/mm_gk20a.h"	23	#include "gk20a/mm_gk20a.h"
23		24
		25	static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
		26	struct gk20a_mm_entry *entry)
		27	{
		28	u32 num_pages = 1 << order;
		29	u32 len = num_pages * PAGE_SIZE;
		30	int err;
		31	struct page *pages;
		32	struct gk20a *g = vm->mm->g;
		33
		34	/* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */
		35
		36	pages = alloc_pages(GFP_KERNEL, order);
		37	if (!pages) {
		38	nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed");
		39	goto err_out;
		40	}
		41	entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt));
		42	if (!entry->mem.priv.sgt) {
		43	nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table");
		44	goto err_alloced;
		45	}
		46	err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL);
		47	if (err) {
		48	nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed");
		49	goto err_sg_table;
		50	}
		51	sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0);
		52	entry->mem.cpu_va = page_address(pages);
		53	memset(entry->mem.cpu_va, 0, len);
		54	entry->mem.size = len;
		55	entry->mem.aperture = APERTURE_SYSMEM;
		56	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
		57	sg_phys(entry->mem.priv.sgt->sgl), len);
		58
		59	return 0;
		60
		61	err_sg_table:
		62	nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
		63	err_alloced:
		64	__free_pages(pages, order);
		65	err_out:
		66	return -ENOMEM;
		67	}
		68
		69	static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
		70	struct gk20a_mm_entry *entry)
		71	{
		72	struct gk20a *g = gk20a_from_vm(vm);
		73	u32 num_pages = 1 << order;
		74	u32 len = num_pages * PAGE_SIZE;
		75	int err;
		76
		77	if (g->is_fmodel)
		78	return alloc_gmmu_phys_pages(vm, order, entry);
		79
		80	/*
		81	* On arm32 we're limited by vmalloc space, so we do not map pages by
		82	* default.
		83	*/
		84	if (IS_ENABLED(CONFIG_ARM64))
		85	err = nvgpu_dma_alloc(g, len, &entry->mem);
		86	else
		87	err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
		88	len, &entry->mem);
		89
		90
		91	if (err) {
		92	nvgpu_err(g, "memory allocation failed");
		93	return -ENOMEM;
		94	}
		95
		96	return 0;
		97	}
		98
		99	/*
		100	* Allocate a phys contig region big enough for a full
		101	* sized gmmu page table for the given gmmu_page_size.
		102	* the whole range is zeroed so it's "invalid"/will fault.
		103	*
		104	* If a previous entry is supplied, its memory will be used for
		105	* suballocation for this next entry too, if there is space.
		106	*/
		107	int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
		108	enum gmmu_pgsz_gk20a pgsz_idx,
		109	const struct gk20a_mmu_level *l,
		110	struct gk20a_mm_entry *entry,
		111	struct gk20a_mm_entry *prev_entry)
		112	{
		113	int err = -ENOMEM;
		114	int order;
		115	struct gk20a *g = gk20a_from_vm(vm);
		116	u32 bytes;
		117
		118	/* allocate enough pages for the table */
		119	order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
		120	order += ilog2(l->entry_size);
		121	bytes = 1 << order;
		122	order -= PAGE_SHIFT;
		123	if (order < 0 && prev_entry) {
		124	/* try to suballocate from previous chunk */
		125	u32 capacity = prev_entry->mem.size / bytes;
		126	u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
		127	u32 free = capacity - prev - 1;
		128
		129	nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
		130	capacity, prev, free, bytes);
		131
		132	if (free) {
		133	memcpy(&entry->mem, &prev_entry->mem,
		134	sizeof(entry->mem));
		135	entry->woffset = prev_entry->woffset
		136	+ bytes / sizeof(u32);
		137	err = 0;
		138	}
		139	}
		140
		141	if (err) {
		142	/* no suballoc space */
		143	order = max(0, order);
		144	err = nvgpu_alloc_gmmu_pages(vm, order, entry);
		145	entry->woffset = 0;
		146	}
		147
		148	nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
		149	entry,
		150	(entry->mem.priv.sgt &&
		151	entry->mem.aperture == APERTURE_SYSMEM) ?
		152	g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
		153	order, entry->woffset);
		154	if (err)
		155	return err;
		156	entry->pgsz = pgsz_idx;
		157	entry->mem.skip_wmb = true;
		158
		159	return err;
		160	}
		161
24	/*	162	/*
25	* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU	163	* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
26	* VA will be allocated for you. If addr is non-zero then the buffer will be	164	* VA will be allocated for you. If addr is non-zero then the buffer will be