gpu: nvgpu: support GMMU mappings for vidmem page allocator

Switch to use page allocator for vidmem Support GMMU mappings for page (non-contiguous page allocator) in update_gmmu_ptes_locked() If aperture is VIDMEM, traverse each chunk in an allocation and map it to GPU VA separately Fix CE page clearing to support page allocator Fix gk20a_pramin_enter() to get base address from new allocator Define API gk20a_mem_get_vidmem_addr() to get base address of allocation. Note that this API should not be used if we have more than 1 chunk Jira DNVGPU-96 Change-Id: I725422f3538aeb477ca4220ba57ef8b3c53db703 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1199177 (cherry picked from commit 1afae6ee6529ab88cedd5bcbe458fbdc0d4b1fd8) Reviewed-on: http://git-master/r/1197647 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2016-08-04 10:26:42 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-09-01 12:10:00 -0400
commit: c845b210129a4a2ebd8a3cd22c53dc30cad3664d (patch)
tree: 41c60205ce8927ef9062aa4a257fd9d8bdf8a4d5 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent: c38cc24e1a752d6eb5b07d771ddbf6ab700f695d (diff)
1 files changed, 149 insertions, 61 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7c731890..dde798cf 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -29,6 +29,7 @@
 #include <linux/lcm.h>
 #include <uapi/linux/nvgpu.h>
 #include <trace/events/gk20a.h>
+#include <gk20a/page_allocator_priv.h>
 #include "gk20a.h"
 #include "mm_gk20a.h"
@@ -84,10 +85,31 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
        mem->cpu_va = NULL;
 }
+static u64 gk20a_mem_get_vidmem_addr(struct gk20a *g, struct mem_desc *mem)
+{
+        struct gk20a_page_alloc *alloc;
+        struct page_alloc_chunk *chunk;
+        if (mem && mem->aperture == APERTURE_VIDMEM) {
+                alloc = (struct gk20a_page_alloc *)
+                                sg_dma_address(mem->sgt->sgl);
+                /* This API should not be used with > 1 chunks */
+                if (alloc->nr_chunks != 1)
+                        return 0;
+                chunk = list_first_entry(&alloc->alloc_chunks,
+                                struct page_alloc_chunk, list_entry);
+                return chunk->base;
+        }
+        return 0;
+}
 /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
 static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
 {
-        u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
+        u64 bufbase = gk20a_mem_get_vidmem_addr(g, mem);
        u64 addr = bufbase + w * sizeof(u32);
        u32 hi = (u32)((addr & ~(u64)0xfffff)
                >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -765,9 +787,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
                return 0;
        err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem",
-                                        SZ_4K, size - SZ_4K, SZ_4K,
+                                        SZ_4K, size - SZ_4K, SZ_4K, 0);
-                                        GPU_ALLOC_FORCE_CONTIG |
-                                        GPU_ALLOC_NO_SCATTER_GATHER);
        if (err) {
                gk20a_err(d, "Failed to register vidmem for size %zu: %d",
                                size, err);
@@ -2721,7 +2741,6 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
 #if defined(CONFIG_GK20A_VIDMEM)
        u64 addr;
        int err;
-        bool need_pramin_access = true;
        gk20a_dbg_fn("");
@@ -2764,13 +2783,22 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
        if (g->mm.vidmem.ce_ctx_id != ~0) {
                struct gk20a_fence *gk20a_fence_out = NULL;
-                u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
+                struct gk20a_fence *gk20a_last_fence = NULL;
+                struct gk20a_page_alloc *alloc = NULL;
+                struct page_alloc_chunk *chunk = NULL;
-                err = gk20a_ce_execute_ops(g->dev,
+                alloc = (struct gk20a_page_alloc *)
+                                g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
+                list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
+                        if (gk20a_last_fence)
+                                gk20a_fence_put(gk20a_last_fence);
+                        err = gk20a_ce_execute_ops(g->dev,
                                g->mm.vidmem.ce_ctx_id,
                                0,
-                                dst_bufbase,
+                                chunk->base,
-                                (u64)size,
+                                chunk->length,
                                0x00000000,
                                NVGPU_CE_DST_LOCATION_LOCAL_FB,
                                NVGPU_CE_MEMSET,
@@ -2778,27 +2806,31 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
                                0,
                                &gk20a_fence_out);
-                if (!err) {
+                        if (err) {
-                        if (gk20a_fence_out) {
+                                gk20a_err(g->dev,
-                                err = gk20a_fence_wait(gk20a_fence_out, gk20a_get_gr_idle_timeout(g));
+                                        "Failed gk20a_ce_execute_ops[%d]", err);
-                                gk20a_fence_put(gk20a_fence_out);
+                                goto fail_free_table;
-                                if (err)
-                                        gk20a_err(g->dev,
-                                                "Failed to get the fence_out from CE execute ops");
-                                else
-                                        need_pramin_access = false;
                        }
-                } else
-                        gk20a_err(g->dev, "Failed gk20a_ce_execute_ops[%d]",err);
-        }
-        if (need_pramin_access)
+                        gk20a_last_fence = gk20a_fence_out;
-                gk20a_memset(g, mem, 0, 0, size);
+                }
+                if (gk20a_last_fence) {
+                        err = gk20a_fence_wait(gk20a_last_fence,
+                                        gk20a_get_gr_idle_timeout(g));
+                        gk20a_fence_put(gk20a_last_fence);
+                        if (err)
+                                gk20a_err(g->dev,
+                                        "Failed to get the fence_out from CE execute ops");
+                }
+        }
        gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
        return 0;
+fail_free_table:
+        sg_free_table(mem->sgt);
 fail_kfree:
        kfree(mem->sgt);
 fail_physfree:
@@ -3381,13 +3413,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
        u32 page_size  = vm->gmmu_page_sizes[pgsz_idx];
        int err;
        struct scatterlist *sgl = NULL;
+        struct gk20a_page_alloc *alloc = NULL;
-        gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
+        struct page_alloc_chunk *chunk = NULL;
-                   pgsz_idx,
+        u64 length;
-                   sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
-                       : 0ULL,
-                   buffer_offset,
-                   sgt ? sgt->nents : 0);
        /* note: here we need to map kernel to small, since the
         * low-level mmu code assumes 0 is small and 1 is big pages */
@@ -3397,45 +3425,105 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
        if (space_to_skip & (page_size - 1))
                return -EINVAL;
-        if (sgt) {
+        err = map_gmmu_pages(g, &vm->pdb);
-                iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
+        if (err) {
-                if (!vm->mm->bypass_smmu && iova) {
+                gk20a_err(dev_from_vm(vm),
-                        iova += space_to_skip;
+                           "couldn't map ptes for update as=%d",
+                           vm_aspace_id(vm));
+                return err;
+        }
+        if (aperture == APERTURE_VIDMEM) {
+                gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx",
+                                pgsz_idx, gpu_va, gpu_end-1, iova);
+                if (sgt) {
+                        alloc = (struct gk20a_page_alloc *)
+                                g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
+                        list_for_each_entry(chunk, &alloc->alloc_chunks,
+                                                        list_entry) {
+                                if (space_to_skip &&
+                                    space_to_skip > chunk->length) {
+                                        space_to_skip -= chunk->length;
+                                } else {
+                                        iova = chunk->base + space_to_skip;
+                                        length = chunk->length - space_to_skip;
+                                        space_to_skip = 0;
+                                        err = update_gmmu_level_locked(vm,
+                                                &vm->pdb, pgsz_idx,
+                                                &sgl,
+                                                &space_to_skip,
+                                                &iova,
+                                                gpu_va, gpu_va + length,
+                                                kind_v, &ctag,
+                                                cacheable, unmapped_pte,
+                                                rw_flag, sparse, 0, priv,
+                                                aperture);
+                                        /* need to set explicit zero here */
+                                        space_to_skip = 0;
+                                        gpu_va += length;
+                                }
+                        }
                } else {
-                        sgl = sgt->sgl;
+                        err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
+                                        &sgl,
-                        gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
+                                        &space_to_skip,
-                                        (u64)sg_phys(sgl),
+                                        &iova,
-                                        sgl->length);
+                                        gpu_va, gpu_end,
-                        while (space_to_skip && sgl &&
+                                        kind_v, &ctag,
-                               space_to_skip + page_size > sgl->length) {
+                                        cacheable, unmapped_pte, rw_flag,
-                                space_to_skip -= sgl->length;
+                                        sparse, 0, priv,
-                                sgl = sg_next(sgl);
+                                        aperture);
+                }
+        } else {
+                gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
+                           pgsz_idx,
+                           sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
+                               : 0ULL,
+                           buffer_offset,
+                           sgt ? sgt->nents : 0);
+                gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
+                                pgsz_idx, gpu_va, gpu_end-1, iova);
+                if (sgt) {
+                        iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
+                        if (!vm->mm->bypass_smmu && iova) {
+                                iova += space_to_skip;
+                        } else {
+                                sgl = sgt->sgl;
                                gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
                                                (u64)sg_phys(sgl),
                                                sgl->length);
+                                while (space_to_skip && sgl &&
+                                      space_to_skip + page_size > sgl->length) {
+                                        space_to_skip -= sgl->length;
+                                        sgl = sg_next(sgl);
+                                        gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
+                                                        (u64)sg_phys(sgl),
+                                                        sgl->length);
+                                }
+                                iova = sg_phys(sgl) + space_to_skip;
                        }
-                        iova = sg_phys(sgl) + space_to_skip;
                }
-        }
-        gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
+                err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
-                        pgsz_idx, gpu_va, gpu_end-1, iova);
+                                &sgl,
-        err = map_gmmu_pages(g, &vm->pdb);
+                                &space_to_skip,
-        if (err) {
+                                &iova,
-                gk20a_err(dev_from_vm(vm),
+                                gpu_va, gpu_end,
-                           "couldn't map ptes for update as=%d",
+                                kind_v, &ctag,
-                           vm_aspace_id(vm));
+                                cacheable, unmapped_pte, rw_flag,
-                return err;
+                                sparse, 0, priv,
+                                aperture);
        }
-        err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
-                        &sgl,
-                        &space_to_skip,
-                        &iova,
-                        gpu_va, gpu_end,
-                        kind_v, &ctag,
-                        cacheable, unmapped_pte, rw_flag, sparse, 0, priv,
-                        aperture);
        unmap_gmmu_pages(g, &vm->pdb);
        smp_mb();
author	Deepak Nibade <dnibade@nvidia.com>	2016-08-04 10:26:42 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-09-01 12:10:00 -0400
commit	c845b210129a4a2ebd8a3cd22c53dc30cad3664d (patch)
tree	41c60205ce8927ef9062aa4a257fd9d8bdf8a4d5 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent	c38cc24e1a752d6eb5b07d771ddbf6ab700f695d (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 7c731890..dde798cf 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -29,6 +29,7 @@
29	#include <linux/lcm.h>	29	#include <linux/lcm.h>
30	#include <uapi/linux/nvgpu.h>	30	#include <uapi/linux/nvgpu.h>
31	#include <trace/events/gk20a.h>	31	#include <trace/events/gk20a.h>
		32	#include <gk20a/page_allocator_priv.h>
32		33
33	#include "gk20a.h"	34	#include "gk20a.h"
34	#include "mm_gk20a.h"	35	#include "mm_gk20a.h"
@@ -84,10 +85,31 @@ void gk20a_mem_end(struct gk20a g, struct mem_desc mem)
84	mem->cpu_va = NULL;	85	mem->cpu_va = NULL;
85	}	86	}
86		87
		88	static u64 gk20a_mem_get_vidmem_addr(struct gk20a g, struct mem_desc mem)
		89	{
		90	struct gk20a_page_alloc *alloc;
		91	struct page_alloc_chunk *chunk;
		92
		93	if (mem && mem->aperture == APERTURE_VIDMEM) {
		94	alloc = (struct gk20a_page_alloc *)
		95	sg_dma_address(mem->sgt->sgl);
		96
		97	/* This API should not be used with > 1 chunks */
		98	if (alloc->nr_chunks != 1)
		99	return 0;
		100
		101	chunk = list_first_entry(&alloc->alloc_chunks,
		102	struct page_alloc_chunk, list_entry);
		103	return chunk->base;
		104	}
		105
		106	return 0;
		107	}
		108
87	/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */	109	/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
88	static u32 gk20a_pramin_enter(struct gk20a g, struct mem_desc mem, u32 w)	110	static u32 gk20a_pramin_enter(struct gk20a g, struct mem_desc mem, u32 w)
89	{	111	{
90	u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);	112	u64 bufbase = gk20a_mem_get_vidmem_addr(g, mem);
91	u64 addr = bufbase + w * sizeof(u32);	113	u64 addr = bufbase + w * sizeof(u32);
92	u32 hi = (u32)((addr & ~(u64)0xfffff)	114	u32 hi = (u32)((addr & ~(u64)0xfffff)
93	>> bus_bar0_window_target_bar0_window_base_shift_v());	115	>> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -765,9 +787,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
765	return 0;	787	return 0;
766		788
767	err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem",	789	err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem",
768	SZ_4K, size - SZ_4K, SZ_4K,	790	SZ_4K, size - SZ_4K, SZ_4K, 0);
769	GPU_ALLOC_FORCE_CONTIG \|
770	GPU_ALLOC_NO_SCATTER_GATHER);
771	if (err) {	791	if (err) {
772	gk20a_err(d, "Failed to register vidmem for size %zu: %d",	792	gk20a_err(d, "Failed to register vidmem for size %zu: %d",
773	size, err);	793	size, err);
@@ -2721,7 +2741,6 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2721	#if defined(CONFIG_GK20A_VIDMEM)	2741	#if defined(CONFIG_GK20A_VIDMEM)
2722	u64 addr;	2742	u64 addr;
2723	int err;	2743	int err;
2724	bool need_pramin_access = true;
2725		2744
2726	gk20a_dbg_fn("");	2745	gk20a_dbg_fn("");
2727		2746
@@ -2764,13 +2783,22 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2764		2783
2765	if (g->mm.vidmem.ce_ctx_id != ~0) {	2784	if (g->mm.vidmem.ce_ctx_id != ~0) {
2766	struct gk20a_fence *gk20a_fence_out = NULL;	2785	struct gk20a_fence *gk20a_fence_out = NULL;
2767	u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);	2786	struct gk20a_fence *gk20a_last_fence = NULL;
		2787	struct gk20a_page_alloc *alloc = NULL;
		2788	struct page_alloc_chunk *chunk = NULL;
2768		2789
2769	err = gk20a_ce_execute_ops(g->dev,	2790	alloc = (struct gk20a_page_alloc *)
		2791	g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
		2792
		2793	list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) {
		2794	if (gk20a_last_fence)
		2795	gk20a_fence_put(gk20a_last_fence);
		2796
		2797	err = gk20a_ce_execute_ops(g->dev,
2770	g->mm.vidmem.ce_ctx_id,	2798	g->mm.vidmem.ce_ctx_id,
2771	0,	2799	0,
2772	dst_bufbase,	2800	chunk->base,
2773	(u64)size,	2801	chunk->length,
2774	0x00000000,	2802	0x00000000,
2775	NVGPU_CE_DST_LOCATION_LOCAL_FB,	2803	NVGPU_CE_DST_LOCATION_LOCAL_FB,
2776	NVGPU_CE_MEMSET,	2804	NVGPU_CE_MEMSET,
@@ -2778,27 +2806,31 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2778	0,	2806	0,
2779	&gk20a_fence_out);	2807	&gk20a_fence_out);
2780		2808
2781	if (!err) {	2809	if (err) {
2782	if (gk20a_fence_out) {	2810	gk20a_err(g->dev,
2783	err = gk20a_fence_wait(gk20a_fence_out, gk20a_get_gr_idle_timeout(g));	2811	"Failed gk20a_ce_execute_ops[%d]", err);
2784	gk20a_fence_put(gk20a_fence_out);	2812	goto fail_free_table;
2785	if (err)
2786	gk20a_err(g->dev,
2787	"Failed to get the fence_out from CE execute ops");
2788	else
2789	need_pramin_access = false;
2790	}	2813	}
2791	} else
2792	gk20a_err(g->dev, "Failed gk20a_ce_execute_ops[%d]",err);
2793	}
2794		2814
2795	if (need_pramin_access)	2815	gk20a_last_fence = gk20a_fence_out;
2796	gk20a_memset(g, mem, 0, 0, size);	2816	}
		2817
		2818	if (gk20a_last_fence) {
		2819	err = gk20a_fence_wait(gk20a_last_fence,
		2820	gk20a_get_gr_idle_timeout(g));
		2821	gk20a_fence_put(gk20a_last_fence);
		2822	if (err)
		2823	gk20a_err(g->dev,
		2824	"Failed to get the fence_out from CE execute ops");
		2825	}
		2826	}
2797		2827
2798	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);	2828	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
2799		2829
2800	return 0;	2830	return 0;
2801		2831
		2832	fail_free_table:
		2833	sg_free_table(mem->sgt);
2802	fail_kfree:	2834	fail_kfree:
2803	kfree(mem->sgt);	2835	kfree(mem->sgt);
2804	fail_physfree:	2836	fail_physfree:
@@ -3381,13 +3413,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3381	u32 page_size = vm->gmmu_page_sizes[pgsz_idx];	3413	u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
3382	int err;	3414	int err;
3383	struct scatterlist *sgl = NULL;	3415	struct scatterlist *sgl = NULL;
3384		3416	struct gk20a_page_alloc *alloc = NULL;
3385	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",	3417	struct page_alloc_chunk *chunk = NULL;
3386	pgsz_idx,	3418	u64 length;
3387	sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
3388	: 0ULL,
3389	buffer_offset,
3390	sgt ? sgt->nents : 0);
3391		3419
3392	/* note: here we need to map kernel to small, since the	3420	/* note: here we need to map kernel to small, since the
3393	* low-level mmu code assumes 0 is small and 1 is big pages */	3421	* low-level mmu code assumes 0 is small and 1 is big pages */
@@ -3397,45 +3425,105 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3397	if (space_to_skip & (page_size - 1))	3425	if (space_to_skip & (page_size - 1))
3398	return -EINVAL;	3426	return -EINVAL;
3399		3427
3400	if (sgt) {	3428	err = map_gmmu_pages(g, &vm->pdb);
3401	iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);	3429	if (err) {
3402	if (!vm->mm->bypass_smmu && iova) {	3430	gk20a_err(dev_from_vm(vm),
3403	iova += space_to_skip;	3431	"couldn't map ptes for update as=%d",
		3432	vm_aspace_id(vm));
		3433	return err;
		3434	}
		3435
		3436	if (aperture == APERTURE_VIDMEM) {
		3437	gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx",
		3438	pgsz_idx, gpu_va, gpu_end-1, iova);
		3439
		3440	if (sgt) {
		3441	alloc = (struct gk20a_page_alloc *)
		3442	g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
		3443
		3444	list_for_each_entry(chunk, &alloc->alloc_chunks,
		3445	list_entry) {
		3446	if (space_to_skip &&
		3447	space_to_skip > chunk->length) {
		3448	space_to_skip -= chunk->length;
		3449	} else {
		3450	iova = chunk->base + space_to_skip;
		3451	length = chunk->length - space_to_skip;
		3452	space_to_skip = 0;
		3453
		3454	err = update_gmmu_level_locked(vm,
		3455	&vm->pdb, pgsz_idx,
		3456	&sgl,
		3457	&space_to_skip,
		3458	&iova,
		3459	gpu_va, gpu_va + length,
		3460	kind_v, &ctag,
		3461	cacheable, unmapped_pte,
		3462	rw_flag, sparse, 0, priv,
		3463	aperture);
		3464
		3465	/* need to set explicit zero here */
		3466	space_to_skip = 0;
		3467	gpu_va += length;
		3468	}
		3469	}
3404	} else {	3470	} else {
3405	sgl = sgt->sgl;	3471	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
3406		3472	&sgl,
3407	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",	3473	&space_to_skip,
3408	(u64)sg_phys(sgl),	3474	&iova,
3409	sgl->length);	3475	gpu_va, gpu_end,
3410	while (space_to_skip && sgl &&	3476	kind_v, &ctag,
3411	space_to_skip + page_size > sgl->length) {	3477	cacheable, unmapped_pte, rw_flag,
3412	space_to_skip -= sgl->length;	3478	sparse, 0, priv,
3413	sgl = sg_next(sgl);	3479	aperture);
		3480	}
		3481	} else {
		3482	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
		3483	pgsz_idx,
		3484	sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0)
		3485	: 0ULL,
		3486	buffer_offset,
		3487	sgt ? sgt->nents : 0);
		3488
		3489	gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
		3490	pgsz_idx, gpu_va, gpu_end-1, iova);
		3491
		3492	if (sgt) {
		3493	iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0);
		3494	if (!vm->mm->bypass_smmu && iova) {
		3495	iova += space_to_skip;
		3496	} else {
		3497	sgl = sgt->sgl;
		3498
3414	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",	3499	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
3415	(u64)sg_phys(sgl),	3500	(u64)sg_phys(sgl),
3416	sgl->length);	3501	sgl->length);
		3502
		3503	while (space_to_skip && sgl &&
		3504	space_to_skip + page_size > sgl->length) {
		3505	space_to_skip -= sgl->length;
		3506	sgl = sg_next(sgl);
		3507	gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
		3508	(u64)sg_phys(sgl),
		3509	sgl->length);
		3510	}
		3511
		3512	iova = sg_phys(sgl) + space_to_skip;
3417	}	3513	}
3418	iova = sg_phys(sgl) + space_to_skip;
3419	}	3514	}
3420	}
3421		3515
3422	gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",	3516	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
3423	pgsz_idx, gpu_va, gpu_end-1, iova);	3517	&sgl,
3424	err = map_gmmu_pages(g, &vm->pdb);	3518	&space_to_skip,
3425	if (err) {	3519	&iova,
3426	gk20a_err(dev_from_vm(vm),	3520	gpu_va, gpu_end,
3427	"couldn't map ptes for update as=%d",	3521	kind_v, &ctag,
3428	vm_aspace_id(vm));	3522	cacheable, unmapped_pte, rw_flag,
3429	return err;	3523	sparse, 0, priv,
		3524	aperture);
3430	}	3525	}
3431	err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,	3526
3432	&sgl,
3433	&space_to_skip,
3434	&iova,
3435	gpu_va, gpu_end,
3436	kind_v, &ctag,
3437	cacheable, unmapped_pte, rw_flag, sparse, 0, priv,
3438	aperture);
3439	unmap_gmmu_pages(g, &vm->pdb);	3527	unmap_gmmu_pages(g, &vm->pdb);
3440		3528
3441	smp_mb();	3529	smp_mb();