1 files changed, 38 insertions, 43 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index ec1bc095..602dfb3b 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -45,7 +45,8 @@ static int pd_allocate(struct vm_gk20a *vm,
                       struct nvgpu_gmmu_pd *pd,
                       const struct gk20a_mmu_level *l,
                       struct nvgpu_gmmu_attrs *attrs);
+static u32 pd_size(const struct gk20a_mmu_level *l,
+                   struct nvgpu_gmmu_attrs *attrs);
 /*
 * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
 * VA will be allocated for you. If addr is non-zero then the buffer will be
@@ -138,6 +139,9 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
 int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
 {
+        u32 pdb_size;
+        int err;
        /*
         * Need this just for page size. Everything else can be ignored. Also
         * note that we can just use pgsz 0 (i.e small pages) since the number
@@ -148,56 +152,43 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
                .pgsz = 0,
        };
-        return pd_allocate(vm, &vm->pdb, &vm->mmu_levels[0], &attrs);
+        /*
-}
+         * PDB size here must be one page so that its address is page size
+         * aligned. Although lower PDE tables can be aligned at 256B boundaries
+         * the main PDB must be page aligned.
+         */
+        pdb_size = ALIGN(pd_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE);
+        err = __nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size);
+        if (WARN_ON(err))
+                return err;
+        /*
+         * One mb() is done after all mapping operations. Don't need individual
+         * barriers for each PD write.
+         */
+        vm->pdb.mem->skip_wmb = true;
+        return 0;
+}
 /*
 * Ensure that there's a CPU mapping for the page directory memory. This won't
 * always be the case for 32 bit systems since we may need to save kernel
 * virtual memory.
 */
-static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *entry)
+static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
 {
-        return nvgpu_mem_begin(g, &entry->mem);
+        return nvgpu_mem_begin(g, pd->mem);
 }
 /*
 * Handle any necessary CPU unmap semantics for a page directories DMA memory.
 * For 64 bit platforms this is a noop.
 */
-static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *entry)
+static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
-{
-        nvgpu_mem_end(g, &entry->mem);
-}
-static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 bytes,
-                                  struct nvgpu_gmmu_pd *pd)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        unsigned long flags = NVGPU_DMA_FORCE_CONTIGUOUS;
-        int err;
-        /*
-         * On arm32 vmalloc space is a precious commodity so we do not map pages
-         * by default.
-         */
-        if (!IS_ENABLED(CONFIG_ARM64))
-                flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
-        err = nvgpu_dma_alloc_flags(g, flags, bytes, &pd->mem);
-        if (err)
-                return -ENOMEM;
-        return 0;
-}
-void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
-                           struct nvgpu_gmmu_pd *pd)
 {
-        struct gk20a *g = gk20a_from_vm(vm);
+        nvgpu_mem_end(g, pd->mem);
-        nvgpu_dma_free(g, &pd->mem);
 }
 /*
@@ -205,10 +196,14 @@ void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
 */
 u64 nvgpu_pde_phys_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
 {
+        u64 page_addr;
        if (g->mm.has_physical_mode)
-                return sg_phys(pd->mem.priv.sgt->sgl);
+                page_addr = sg_phys(pd->mem->priv.sgt->sgl);
        else
-                return nvgpu_mem_get_base_addr(g, &pd->mem, 0);
+                page_addr = nvgpu_mem_get_base_addr(g, pd->mem, 0);
+        return page_addr + pd->mem_offs;
 }
 /*
@@ -254,10 +249,10 @@ static int pd_allocate(struct vm_gk20a *vm,
 {
        int err;
-        if (pd->mem.size)
+        if (pd->mem)
                return 0;
-        err = nvgpu_alloc_gmmu_pages(vm, pd_size(l, attrs), pd);
+        err = __nvgpu_pd_alloc(vm, pd, pd_size(l, attrs));
        if (err) {
                nvgpu_info(vm->mm->g, "error allocating page directory!");
                return err;
@@ -267,7 +262,7 @@ static int pd_allocate(struct vm_gk20a *vm,
         * One mb() is done after all mapping operations. Don't need individual
         * barriers for each PD write.
         */
-        pd->mem.skip_wmb = true;
+        pd->mem->skip_wmb = true;
        return 0;
 }
@@ -778,7 +773,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
        }
        if (!batch)
-                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
+                g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
        else
                batch->need_tlb_invalidate = true;
@@ -830,7 +825,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
        if (!batch) {
                gk20a_mm_l2_flush(g, true);
-                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
+                g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
        } else {
                if (!batch->gpu_l2_flushed) {
                        gk20a_mm_l2_flush(g, true);

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index ec1bc095..602dfb3b 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -45,7 +45,8 @@ static int pd_allocate(struct vm_gk20a *vm,
45	struct nvgpu_gmmu_pd *pd,	45	struct nvgpu_gmmu_pd *pd,
46	const struct gk20a_mmu_level *l,	46	const struct gk20a_mmu_level *l,
47	struct nvgpu_gmmu_attrs *attrs);	47	struct nvgpu_gmmu_attrs *attrs);
48		48	static u32 pd_size(const struct gk20a_mmu_level *l,
		49	struct nvgpu_gmmu_attrs *attrs);
49	/*	50	/*
50	* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU	51	* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
51	* VA will be allocated for you. If addr is non-zero then the buffer will be	52	* VA will be allocated for you. If addr is non-zero then the buffer will be
@@ -138,6 +139,9 @@ void nvgpu_gmmu_unmap(struct vm_gk20a vm, struct nvgpu_mem mem, u64 gpu_va)
138		139
139	int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)	140	int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
140	{	141	{
		142	u32 pdb_size;
		143	int err;
		144
141	/*	145	/*
142	* Need this just for page size. Everything else can be ignored. Also	146	* Need this just for page size. Everything else can be ignored. Also
143	* note that we can just use pgsz 0 (i.e small pages) since the number	147	* note that we can just use pgsz 0 (i.e small pages) since the number
@@ -148,56 +152,43 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
148	.pgsz = 0,	152	.pgsz = 0,
149	};	153	};
150		154
151	return pd_allocate(vm, &vm->pdb, &vm->mmu_levels[0], &attrs);	155	/*
152	}	156	* PDB size here must be one page so that its address is page size
		157	* aligned. Although lower PDE tables can be aligned at 256B boundaries
		158	* the main PDB must be page aligned.
		159	*/
		160	pdb_size = ALIGN(pd_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE);
		161
		162	err = __nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size);
		163	if (WARN_ON(err))
		164	return err;
153		165
		166	/*
		167	* One mb() is done after all mapping operations. Don't need individual
		168	* barriers for each PD write.
		169	*/
		170	vm->pdb.mem->skip_wmb = true;
		171
		172	return 0;
		173	}
154		174
155	/*	175	/*
156	* Ensure that there's a CPU mapping for the page directory memory. This won't	176	* Ensure that there's a CPU mapping for the page directory memory. This won't
157	* always be the case for 32 bit systems since we may need to save kernel	177	* always be the case for 32 bit systems since we may need to save kernel
158	* virtual memory.	178	* virtual memory.
159	*/	179	*/
160	static int map_gmmu_pages(struct gk20a g, struct nvgpu_gmmu_pd entry)	180	static int map_gmmu_pages(struct gk20a g, struct nvgpu_gmmu_pd pd)
161	{	181	{
162	return nvgpu_mem_begin(g, &entry->mem);	182	return nvgpu_mem_begin(g, pd->mem);
163	}	183	}
164		184
165	/*	185	/*
166	* Handle any necessary CPU unmap semantics for a page directories DMA memory.	186	* Handle any necessary CPU unmap semantics for a page directories DMA memory.
167	* For 64 bit platforms this is a noop.	187	* For 64 bit platforms this is a noop.
168	*/	188	*/
169	static void unmap_gmmu_pages(struct gk20a g, struct nvgpu_gmmu_pd entry)	189	static void unmap_gmmu_pages(struct gk20a g, struct nvgpu_gmmu_pd pd)
170	{
171	nvgpu_mem_end(g, &entry->mem);
172	}
173
174	static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 bytes,
175	struct nvgpu_gmmu_pd *pd)
176	{
177	struct gk20a *g = gk20a_from_vm(vm);
178	unsigned long flags = NVGPU_DMA_FORCE_CONTIGUOUS;
179	int err;
180
181	/*
182	* On arm32 vmalloc space is a precious commodity so we do not map pages
183	* by default.
184	*/
185	if (!IS_ENABLED(CONFIG_ARM64))
186	flags \|= NVGPU_DMA_NO_KERNEL_MAPPING;
187
188	err = nvgpu_dma_alloc_flags(g, flags, bytes, &pd->mem);
189	if (err)
190	return -ENOMEM;
191
192	return 0;
193	}
194
195	void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
196	struct nvgpu_gmmu_pd *pd)
197	{	190	{
198	struct gk20a *g = gk20a_from_vm(vm);	191	nvgpu_mem_end(g, pd->mem);
199
200	nvgpu_dma_free(g, &pd->mem);
201	}	192	}
202		193
203	/*	194	/*
@@ -205,10 +196,14 @@ void nvgpu_free_gmmu_pages(struct vm_gk20a *vm,
205	*/	196	*/
206	u64 nvgpu_pde_phys_addr(struct gk20a g, struct nvgpu_gmmu_pd pd)	197	u64 nvgpu_pde_phys_addr(struct gk20a g, struct nvgpu_gmmu_pd pd)
207	{	198	{
		199	u64 page_addr;
		200
208	if (g->mm.has_physical_mode)	201	if (g->mm.has_physical_mode)
209	return sg_phys(pd->mem.priv.sgt->sgl);	202	page_addr = sg_phys(pd->mem->priv.sgt->sgl);
210	else	203	else
211	return nvgpu_mem_get_base_addr(g, &pd->mem, 0);	204	page_addr = nvgpu_mem_get_base_addr(g, pd->mem, 0);
		205
		206	return page_addr + pd->mem_offs;
212	}	207	}
213		208
214	/*	209	/*
@@ -254,10 +249,10 @@ static int pd_allocate(struct vm_gk20a *vm,
254	{	249	{
255	int err;	250	int err;
256		251
257	if (pd->mem.size)	252	if (pd->mem)
258	return 0;	253	return 0;
259		254
260	err = nvgpu_alloc_gmmu_pages(vm, pd_size(l, attrs), pd);	255	err = __nvgpu_pd_alloc(vm, pd, pd_size(l, attrs));
261	if (err) {	256	if (err) {
262	nvgpu_info(vm->mm->g, "error allocating page directory!");	257	nvgpu_info(vm->mm->g, "error allocating page directory!");
263	return err;	258	return err;
@@ -267,7 +262,7 @@ static int pd_allocate(struct vm_gk20a *vm,
267	* One mb() is done after all mapping operations. Don't need individual	262	* One mb() is done after all mapping operations. Don't need individual
268	* barriers for each PD write.	263	* barriers for each PD write.
269	*/	264	*/
270	pd->mem.skip_wmb = true;	265	pd->mem->skip_wmb = true;
271		266
272	return 0;	267	return 0;
273	}	268	}
@@ -778,7 +773,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
778	}	773	}
779		774
780	if (!batch)	775	if (!batch)
781	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);	776	g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
782	else	777	else
783	batch->need_tlb_invalidate = true;	778	batch->need_tlb_invalidate = true;
784		779
@@ -830,7 +825,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
830		825
831	if (!batch) {	826	if (!batch) {
832	gk20a_mm_l2_flush(g, true);	827	gk20a_mm_l2_flush(g, true);
833	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);	828	g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
834	} else {	829	} else {
835	if (!batch->gpu_l2_flushed) {	830	if (!batch->gpu_l2_flushed) {
836	gk20a_mm_l2_flush(g, true);	831	gk20a_mm_l2_flush(g, true);