gpu: nvgpu: handle map/unmap for vidmem gmmu pages

If page tables are allocated from vidmem, cpu cache flushing doesn't make sense, so skip it. Unify also map/unmap actions if the pages are not mapped. Jira DNVGPU-20 Change-Id: I36b22749aab99a7bae26c869075f8073eab0f860 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1178830 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2016-07-07 06:44:39 -0400
committer: Vijayakumar Subbu <vsubbu@nvidia.com> 2016-07-14 02:39:09 -0400
commit: 7844397404b66df8952df42218c7907fd510e55d (patch)
tree: 8008a3a8fda5ee91adf3262b8a063fbcc92b2c3d /drivers
parent: dc137541b032906e6db45e2f9853fbcff5e267a5 (diff)
2 files changed, 20 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index adce734c..7e4adc5b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1016,31 +1016,31 @@ void free_gmmu_pages(struct vm_gk20a *vm,
                                &entry->mem);
 }
-int map_gmmu_pages(struct gk20a_mm_entry *entry)
+int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
 {
-        int count = PAGE_ALIGN(entry->mem.size) >> PAGE_SHIFT;
-        struct page **pages;
        gk20a_dbg_fn("");
        if (tegra_platform_is_linsim())
                return map_gmmu_phys_pages(entry);
        if (IS_ENABLED(CONFIG_ARM64)) {
+                if (entry->mem.aperture == APERTURE_VIDMEM)
+                        return 0;
                FLUSH_CPU_DCACHE(entry->mem.cpu_va,
                                 sg_phys(entry->mem.sgt->sgl),
                                 entry->mem.size);
        } else {
-                pages = entry->mem.pages;
+                int err = gk20a_mem_begin(g, &entry->mem);
-                entry->mem.cpu_va = vmap(pages, count, 0,
-                                     pgprot_writecombine(PAGE_KERNEL));
+                if (err)
-                if (!entry->mem.cpu_va)
+                        return err;
-                        return -ENOMEM;
        }
        return 0;
 }
-void unmap_gmmu_pages(struct gk20a_mm_entry *entry)
+void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
 {
        gk20a_dbg_fn("");
@@ -1050,12 +1050,14 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry)
        }
        if (IS_ENABLED(CONFIG_ARM64)) {
+                if (entry->mem.aperture == APERTURE_VIDMEM)
+                        return;
                FLUSH_CPU_DCACHE(entry->mem.cpu_va,
                                 sg_phys(entry->mem.sgt->sgl),
                                 entry->mem.size);
        } else {
-                vunmap(entry->mem.cpu_va);
+                gk20a_mem_end(g, &entry->mem);
-                entry->mem.cpu_va = NULL;
        }
 }
@@ -3019,6 +3021,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
                                    bool priv,
                                    enum gk20a_aperture aperture)
 {
+        struct gk20a *g = gk20a_from_vm(vm);
        const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
        const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
        int err = 0;
@@ -3071,7 +3074,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
                if (next_l->update_entry) {
                        /* get cpu access to the ptes */
-                        err = map_gmmu_pages(next_pte);
+                        err = map_gmmu_pages(g, next_pte);
                        if (err) {
                                gk20a_err(dev_from_vm(vm),
                                           "couldn't map ptes for update as=%d",
@@ -3087,7 +3090,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
                                next,
                                kind_v, ctag, cacheable, unmapped_pte,
                                rw_flag, sparse, lvl+1, priv, aperture);
-                        unmap_gmmu_pages(next_pte);
+                        unmap_gmmu_pages(g, next_pte);
                        if (err)
                                return err;
@@ -3162,7 +3165,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
        gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
                        pgsz_idx, gpu_va, gpu_end-1, iova);
-        err = map_gmmu_pages(&vm->pdb);
+        err = map_gmmu_pages(g, &vm->pdb);
        if (err) {
                gk20a_err(dev_from_vm(vm),
                           "couldn't map ptes for update as=%d",
@@ -3177,7 +3180,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                        kind_v, &ctag,
                        cacheable, unmapped_pte, rw_flag, sparse, 0, priv,
                        aperture);
-        unmap_gmmu_pages(&vm->pdb);
+        unmap_gmmu_pages(g, &vm->pdb);
        smp_mb();
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 4b811ddf..2e9172c7 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -742,8 +742,8 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
 int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
                           u64 offset, struct gk20a_buffer_state **state);
-int map_gmmu_pages(struct gk20a_mm_entry *entry);
+int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry);
-void unmap_gmmu_pages(struct gk20a_mm_entry *entry);
+void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry);
 void pde_range_from_vaddr_range(struct vm_gk20a *vm,
                                              u64 addr_lo, u64 addr_hi,
                                              u32 *pde_lo, u32 *pde_hi);
author	Konsta Holtta <kholtta@nvidia.com>	2016-07-07 06:44:39 -0400
committer	Vijayakumar Subbu <vsubbu@nvidia.com>	2016-07-14 02:39:09 -0400
commit	7844397404b66df8952df42218c7907fd510e55d (patch)
tree	8008a3a8fda5ee91adf3262b8a063fbcc92b2c3d /drivers
parent	dc137541b032906e6db45e2f9853fbcff5e267a5 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index adce734c..7e4adc5b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1016,31 +1016,31 @@ void free_gmmu_pages(struct vm_gk20a *vm,
1016	&entry->mem);	1016	&entry->mem);
1017	}	1017	}
1018		1018
1019	int map_gmmu_pages(struct gk20a_mm_entry *entry)	1019	int map_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry)
1020	{	1020	{
1021	int count = PAGE_ALIGN(entry->mem.size) >> PAGE_SHIFT;
1022	struct page **pages;
1023	gk20a_dbg_fn("");	1021	gk20a_dbg_fn("");
1024		1022
1025	if (tegra_platform_is_linsim())	1023	if (tegra_platform_is_linsim())
1026	return map_gmmu_phys_pages(entry);	1024	return map_gmmu_phys_pages(entry);
1027		1025
1028	if (IS_ENABLED(CONFIG_ARM64)) {	1026	if (IS_ENABLED(CONFIG_ARM64)) {
		1027	if (entry->mem.aperture == APERTURE_VIDMEM)
		1028	return 0;
		1029
1029	FLUSH_CPU_DCACHE(entry->mem.cpu_va,	1030	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
1030	sg_phys(entry->mem.sgt->sgl),	1031	sg_phys(entry->mem.sgt->sgl),
1031	entry->mem.size);	1032	entry->mem.size);
1032	} else {	1033	} else {
1033	pages = entry->mem.pages;	1034	int err = gk20a_mem_begin(g, &entry->mem);
1034	entry->mem.cpu_va = vmap(pages, count, 0,	1035
1035	pgprot_writecombine(PAGE_KERNEL));	1036	if (err)
1036	if (!entry->mem.cpu_va)	1037	return err;
1037	return -ENOMEM;
1038	}	1038	}
1039		1039
1040	return 0;	1040	return 0;
1041	}	1041	}
1042		1042
1043	void unmap_gmmu_pages(struct gk20a_mm_entry *entry)	1043	void unmap_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry)
1044	{	1044	{
1045	gk20a_dbg_fn("");	1045	gk20a_dbg_fn("");
1046		1046
@@ -1050,12 +1050,14 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry)
1050	}	1050	}
1051		1051
1052	if (IS_ENABLED(CONFIG_ARM64)) {	1052	if (IS_ENABLED(CONFIG_ARM64)) {
		1053	if (entry->mem.aperture == APERTURE_VIDMEM)
		1054	return;
		1055
1053	FLUSH_CPU_DCACHE(entry->mem.cpu_va,	1056	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
1054	sg_phys(entry->mem.sgt->sgl),	1057	sg_phys(entry->mem.sgt->sgl),
1055	entry->mem.size);	1058	entry->mem.size);
1056	} else {	1059	} else {
1057	vunmap(entry->mem.cpu_va);	1060	gk20a_mem_end(g, &entry->mem);
1058	entry->mem.cpu_va = NULL;
1059	}	1061	}
1060	}	1062	}
1061		1063
@@ -3019,6 +3021,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3019	bool priv,	3021	bool priv,
3020	enum gk20a_aperture aperture)	3022	enum gk20a_aperture aperture)
3021	{	3023	{
		3024	struct gk20a *g = gk20a_from_vm(vm);
3022	const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];	3025	const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
3023	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];	3026	const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
3024	int err = 0;	3027	int err = 0;
@@ -3071,7 +3074,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3071		3074
3072	if (next_l->update_entry) {	3075	if (next_l->update_entry) {
3073	/* get cpu access to the ptes */	3076	/* get cpu access to the ptes */
3074	err = map_gmmu_pages(next_pte);	3077	err = map_gmmu_pages(g, next_pte);
3075	if (err) {	3078	if (err) {
3076	gk20a_err(dev_from_vm(vm),	3079	gk20a_err(dev_from_vm(vm),
3077	"couldn't map ptes for update as=%d",	3080	"couldn't map ptes for update as=%d",
@@ -3087,7 +3090,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
3087	next,	3090	next,
3088	kind_v, ctag, cacheable, unmapped_pte,	3091	kind_v, ctag, cacheable, unmapped_pte,
3089	rw_flag, sparse, lvl+1, priv, aperture);	3092	rw_flag, sparse, lvl+1, priv, aperture);
3090	unmap_gmmu_pages(next_pte);	3093	unmap_gmmu_pages(g, next_pte);
3091		3094
3092	if (err)	3095	if (err)
3093	return err;	3096	return err;
@@ -3162,7 +3165,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3162		3165
3163	gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",	3166	gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
3164	pgsz_idx, gpu_va, gpu_end-1, iova);	3167	pgsz_idx, gpu_va, gpu_end-1, iova);
3165	err = map_gmmu_pages(&vm->pdb);	3168	err = map_gmmu_pages(g, &vm->pdb);
3166	if (err) {	3169	if (err) {
3167	gk20a_err(dev_from_vm(vm),	3170	gk20a_err(dev_from_vm(vm),
3168	"couldn't map ptes for update as=%d",	3171	"couldn't map ptes for update as=%d",
@@ -3177,7 +3180,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
3177	kind_v, &ctag,	3180	kind_v, &ctag,
3178	cacheable, unmapped_pte, rw_flag, sparse, 0, priv,	3181	cacheable, unmapped_pte, rw_flag, sparse, 0, priv,
3179	aperture);	3182	aperture);
3180	unmap_gmmu_pages(&vm->pdb);	3183	unmap_gmmu_pages(g, &vm->pdb);
3181		3184
3182	smp_mb();	3185	smp_mb();
3183		3186


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 4b811ddf..2e9172c7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -742,8 +742,8 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf dmabuf, struct device dev);
742	int gk20a_dmabuf_get_state(struct dma_buf dmabuf, struct device dev,	742	int gk20a_dmabuf_get_state(struct dma_buf dmabuf, struct device dev,
743	u64 offset, struct gk20a_buffer_state **state);	743	u64 offset, struct gk20a_buffer_state **state);
744		744
745	int map_gmmu_pages(struct gk20a_mm_entry *entry);	745	int map_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry);
746	void unmap_gmmu_pages(struct gk20a_mm_entry *entry);	746	void unmap_gmmu_pages(struct gk20a g, struct gk20a_mm_entry entry);
747	void pde_range_from_vaddr_range(struct vm_gk20a *vm,	747	void pde_range_from_vaddr_range(struct vm_gk20a *vm,
748	u64 addr_lo, u64 addr_hi,	748	u64 addr_lo, u64 addr_hi,
749	u32 pde_lo, u32 pde_hi);	749	u32 pde_lo, u32 pde_hi);