3 files changed, 47 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index edf1d548..b5ef3f0d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -234,7 +234,11 @@ struct gpu_ops {
        } gr_ctx;
        struct {
                int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
+                               u32 num_pages, u32 pgsz_idx, bool refplus);
+                int (*put_empty)(struct vm_gk20a *vm, u64 vaddr,
                               u32 num_pages, u32 pgsz_idx);
+                void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
+                               u64 size, u32 pgsz_idx);
        } mm;
        struct {
                int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 57c61d51..93a29b13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                                   u64 first_vaddr, u64 last_vaddr,
                                   u8 kind_v, u32 ctag_offset, bool cacheable,
                                   int rw_flag);
-static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
 static int gk20a_init_system_vm(struct mm_gk20a *mm);
 static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
@@ -444,7 +443,7 @@ err_out:
        return -ENOMEM;
 }
-static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
                            struct sg_table *sgt, u32 order,
                            size_t size)
 {
@@ -534,7 +533,7 @@ err_out:
        return -ENOMEM;
 }
-static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
                            struct sg_table *sgt, u32 order,
                            size_t size)
 {
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
   made.  So, superfluous updates will cause unnecessary
   pde invalidations.
 */
-static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
+void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
 {
        bool small_valid, big_valid;
        u64 pte_addr[2] = {0, 0};
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
        if (small_valid)
                pte_addr[gmmu_page_size_small] =
                        gk20a_mm_iova_addr(small_pte->sgt->sgl);
        if (big_valid)
                pte_addr[gmmu_page_size_big] =
                        gk20a_mm_iova_addr(big_pte->sgt->sgl);
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
        vm->tlb_dirty  = true;
 }
 static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
                               u32 num_pages, u32 pgsz_idx)
 {
@@ -1986,6 +1985,18 @@ err_unmap:
        return -EINVAL;
 }
+static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
+                               u32 num_pages, u32 pgsz_idx, bool refplus)
+{
+        return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
+}
+void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
+                               u64 size, u32 pgsz_idx) {
+        __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
+                                false, gk20a_mem_flag_none);
+}
 /* NOTE! mapped_buffers lock must be held */
 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 {
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
                        gmmu_page_shifts[pgsz_idx];
                /* there is little we can do if this fails... */
-                g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx);
+                if (g->ops.mm.put_empty) {
+                        g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
+                } else {
+                        __locked_gmmu_unmap(vm,
+                                mapped_buffer->addr,
+                                mapped_buffer->size,
+                                mapped_buffer->pgsz_idx,
+                                mapped_buffer->va_allocated,
+                                gk20a_mem_flag_none);
+                        g->ops.mm.set_sparse(vm, vaddr,
+                                        num_pages, pgsz_idx, false);
+                }
        } else
                __locked_gmmu_unmap(vm,
                                mapped_buffer->addr,
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        /* mark that we need to use sparse mappings here */
        if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
                err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
-                                         pgsz_idx);
+                                         pgsz_idx, true);
                if (err) {
                        mutex_unlock(&vm->update_gmmu_lock);
                        vma->free(vma, start_page_nr, args->pages);
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
        struct gk20a_allocator *vma;
        struct vm_gk20a *vm = as_share->vm;
        struct vm_reserved_va_node *va_node;
+        struct gk20a *g = gk20a_from_vm(vm);
        gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
                        args->pages, args->offset);
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                /* if this was a sparse mapping, free the va */
                if (va_node->sparse)
-                        __locked_gmmu_unmap(vm,
+                        g->ops.mm.clear_sparse(vm,
-                                va_node->vaddr_start,
+                                        va_node->vaddr_start,
-                                va_node->size,
+                                        va_node->size,
-                                va_node->pgsz_idx,
+                                        va_node->pgsz_idx);
-                                false,
-                                gk20a_mem_flag_none);
                kfree(va_node);
        }
        mutex_unlock(&vm->update_gmmu_lock);
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
 void gk20a_init_mm(struct gpu_ops *gops)
 {
-        gops->mm.set_sparse = gk20a_vm_put_empty;
+        gops->mm.set_sparse = gk20a_vm_put_sparse;
+        gops->mm.put_empty = gk20a_vm_put_empty;
+        gops->mm.clear_sparse = gk20a_vm_clear_sparse;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index da19f83e..b8726c62 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,6 +524,12 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
 int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
                                        enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
                                        struct page_table_gk20a *pte);
+void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+                            struct sg_table *sgt, u32 order,
+                            size_t size);
+void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
 struct gpu_ops;
 void gk20a_init_mm(struct gpu_ops *gops);
 #endif /*_MM_GK20A_H_ */

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index edf1d548..b5ef3f0d 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -234,7 +234,11 @@ struct gpu_ops {
234	} gr_ctx;	234	} gr_ctx;
235	struct {	235	struct {
236	int (set_sparse)(struct vm_gk20a vm, u64 vaddr,	236	int (set_sparse)(struct vm_gk20a vm, u64 vaddr,
		237	u32 num_pages, u32 pgsz_idx, bool refplus);
		238	int (put_empty)(struct vm_gk20a vm, u64 vaddr,
237	u32 num_pages, u32 pgsz_idx);	239	u32 num_pages, u32 pgsz_idx);
		240	void (clear_sparse)(struct vm_gk20a vm, u64 vaddr,
		241	u64 size, u32 pgsz_idx);
238	} mm;	242	} mm;
239	struct {	243	struct {
240	int (prepare_ucode)(struct gk20a g);	244	int (prepare_ucode)(struct gk20a g);


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 57c61d51..93a29b13 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
100	u64 first_vaddr, u64 last_vaddr,	100	u64 first_vaddr, u64 last_vaddr,
101	u8 kind_v, u32 ctag_offset, bool cacheable,	101	u8 kind_v, u32 ctag_offset, bool cacheable,
102	int rw_flag);	102	int rw_flag);
103	static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
104	static void gk20a_vm_remove_support(struct vm_gk20a *vm);	103	static void gk20a_vm_remove_support(struct vm_gk20a *vm);
105	static int gk20a_init_system_vm(struct mm_gk20a *mm);	104	static int gk20a_init_system_vm(struct mm_gk20a *mm);
106	static int gk20a_init_bar1_vm(struct mm_gk20a *mm);	105	static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
@@ -444,7 +443,7 @@ err_out:
444	return -ENOMEM;	443	return -ENOMEM;
445	}	444	}
446		445
447	static void free_gmmu_pages(struct vm_gk20a vm, void handle,	446	void free_gmmu_pages(struct vm_gk20a vm, void handle,
448	struct sg_table *sgt, u32 order,	447	struct sg_table *sgt, u32 order,
449	size_t size)	448	size_t size)
450	{	449	{
@@ -534,7 +533,7 @@ err_out:
534	return -ENOMEM;	533	return -ENOMEM;
535	}	534	}
536		535
537	static void free_gmmu_pages(struct vm_gk20a vm, void handle,	536	void free_gmmu_pages(struct vm_gk20a vm, void handle,
538	struct sg_table *sgt, u32 order,	537	struct sg_table *sgt, u32 order,
539	size_t size)	538	size_t size)
540	{	539	{
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
1865	made. So, superfluous updates will cause unnecessary	1864	made. So, superfluous updates will cause unnecessary
1866	pde invalidations.	1865	pde invalidations.
1867	*/	1866	*/
1868	static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)	1867	void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1869	{	1868	{
1870	bool small_valid, big_valid;	1869	bool small_valid, big_valid;
1871	u64 pte_addr[2] = {0, 0};	1870	u64 pte_addr[2] = {0, 0};
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1882	if (small_valid)	1881	if (small_valid)
1883	pte_addr[gmmu_page_size_small] =	1882	pte_addr[gmmu_page_size_small] =
1884	gk20a_mm_iova_addr(small_pte->sgt->sgl);	1883	gk20a_mm_iova_addr(small_pte->sgt->sgl);
		1884
1885	if (big_valid)	1885	if (big_valid)
1886	pte_addr[gmmu_page_size_big] =	1886	pte_addr[gmmu_page_size_big] =
1887	gk20a_mm_iova_addr(big_pte->sgt->sgl);	1887	gk20a_mm_iova_addr(big_pte->sgt->sgl);
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1920	vm->tlb_dirty = true;	1920	vm->tlb_dirty = true;
1921	}	1921	}
1922		1922
1923
1924	static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,	1923	static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1925	u32 num_pages, u32 pgsz_idx)	1924	u32 num_pages, u32 pgsz_idx)
1926	{	1925	{
@@ -1986,6 +1985,18 @@ err_unmap:
1986	return -EINVAL;	1985	return -EINVAL;
1987	}	1986	}
1988		1987
		1988	static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
		1989	u32 num_pages, u32 pgsz_idx, bool refplus)
		1990	{
		1991	return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
		1992	}
		1993
		1994	void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
		1995	u64 size, u32 pgsz_idx) {
		1996	__locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
		1997	false, gk20a_mem_flag_none);
		1998	}
		1999
1989	/* NOTE! mapped_buffers lock must be held */	2000	/* NOTE! mapped_buffers lock must be held */
1990	static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)	2001	static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1991	{	2002	{
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2000	gmmu_page_shifts[pgsz_idx];	2011	gmmu_page_shifts[pgsz_idx];
2001		2012
2002	/* there is little we can do if this fails... */	2013	/* there is little we can do if this fails... */
2003	g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx);	2014	if (g->ops.mm.put_empty) {
2004		2015	g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
		2016	} else {
		2017	__locked_gmmu_unmap(vm,
		2018	mapped_buffer->addr,
		2019	mapped_buffer->size,
		2020	mapped_buffer->pgsz_idx,
		2021	mapped_buffer->va_allocated,
		2022	gk20a_mem_flag_none);
		2023	g->ops.mm.set_sparse(vm, vaddr,
		2024	num_pages, pgsz_idx, false);
		2025	}
2005	} else	2026	} else
2006	__locked_gmmu_unmap(vm,	2027	__locked_gmmu_unmap(vm,
2007	mapped_buffer->addr,	2028	mapped_buffer->addr,
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2328	/* mark that we need to use sparse mappings here */	2349	/* mark that we need to use sparse mappings here */
2329	if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {	2350	if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2330	err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,	2351	err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
2331	pgsz_idx);	2352	pgsz_idx, true);
2332	if (err) {	2353	if (err) {
2333	mutex_unlock(&vm->update_gmmu_lock);	2354	mutex_unlock(&vm->update_gmmu_lock);
2334	vma->free(vma, start_page_nr, args->pages);	2355	vma->free(vma, start_page_nr, args->pages);
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2357	struct gk20a_allocator *vma;	2378	struct gk20a_allocator *vma;
2358	struct vm_gk20a *vm = as_share->vm;	2379	struct vm_gk20a *vm = as_share->vm;
2359	struct vm_reserved_va_node *va_node;	2380	struct vm_reserved_va_node *va_node;
		2381	struct gk20a *g = gk20a_from_vm(vm);
2360		2382
2361	gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,	2383	gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2362	args->pages, args->offset);	2384	args->pages, args->offset);
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2400		2422
2401	/* if this was a sparse mapping, free the va */	2423	/* if this was a sparse mapping, free the va */
2402	if (va_node->sparse)	2424	if (va_node->sparse)
2403	__locked_gmmu_unmap(vm,	2425	g->ops.mm.clear_sparse(vm,
2404	va_node->vaddr_start,	2426	va_node->vaddr_start,
2405	va_node->size,	2427	va_node->size,
2406	va_node->pgsz_idx,	2428	va_node->pgsz_idx);
2407	false,
2408	gk20a_mem_flag_none);
2409	kfree(va_node);	2429	kfree(va_node);
2410	}	2430	}
2411	mutex_unlock(&vm->update_gmmu_lock);	2431	mutex_unlock(&vm->update_gmmu_lock);
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3088		3108
3089	void gk20a_init_mm(struct gpu_ops *gops)	3109	void gk20a_init_mm(struct gpu_ops *gops)
3090	{	3110	{
3091	gops->mm.set_sparse = gk20a_vm_put_empty;	3111	gops->mm.set_sparse = gk20a_vm_put_sparse;
		3112	gops->mm.put_empty = gk20a_vm_put_empty;
		3113	gops->mm.clear_sparse = gk20a_vm_clear_sparse;
3092	}	3114	}
3093		3115


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index da19f83e..b8726c62 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,6 +524,12 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
524	int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,	524	int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
525	enum gmmu_pgsz_gk20a gmmu_pgsz_idx,	525	enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
526	struct page_table_gk20a *pte);	526	struct page_table_gk20a *pte);
		527
		528	void free_gmmu_pages(struct vm_gk20a vm, void handle,
		529	struct sg_table *sgt, u32 order,
		530	size_t size);
		531	void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
		532
527	struct gpu_ops;	533	struct gpu_ops;
528	void gk20a_init_mm(struct gpu_ops *gops);	534	void gk20a_init_mm(struct gpu_ops *gops);
529	#endif /_MM_GK20A_H_ /	535	#endif /_MM_GK20A_H_ /