gpu: nvgpu: clear sparse in space free

Gk20a unmaps the addresses binding to dummy page to clear sparse. On Gm20b, we need to free the allocated page table entry for sparse memory. Bug 1538384 Change-Id: Ie2409ab016c29f42c5f7d97dd7287b093b47f9df Signed-off-by: Kevin Huang <kevinh@nvidia.com> Reviewed-on: http://git-master/r/448645 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Kevin Huang <kevinh@nvidia.com> 2014-07-29 18:56:17 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:10:52 -0400
commit: 62e80a189cfa0b6dbb9e27712a1c782e953c32f4 (patch)
tree: 3c8f95d9af8ab26d792e765b64547a7df4273e60 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent: 4439a8e311e09b1f3af1a70d4111c75e002a607d (diff)
1 files changed, 37 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 57c61d51..93a29b13 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                                   u64 first_vaddr, u64 last_vaddr,
                                   u8 kind_v, u32 ctag_offset, bool cacheable,
                                   int rw_flag);
-static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
 static int gk20a_init_system_vm(struct mm_gk20a *mm);
 static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
@@ -444,7 +443,7 @@ err_out:
        return -ENOMEM;
 }
-static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
                            struct sg_table *sgt, u32 order,
                            size_t size)
 {
@@ -534,7 +533,7 @@ err_out:
        return -ENOMEM;
 }
-static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
                            struct sg_table *sgt, u32 order,
                            size_t size)
 {
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
   made.  So, superfluous updates will cause unnecessary
   pde invalidations.
 */
-static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
+void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
 {
        bool small_valid, big_valid;
        u64 pte_addr[2] = {0, 0};
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
        if (small_valid)
                pte_addr[gmmu_page_size_small] =
                        gk20a_mm_iova_addr(small_pte->sgt->sgl);
        if (big_valid)
                pte_addr[gmmu_page_size_big] =
                        gk20a_mm_iova_addr(big_pte->sgt->sgl);
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
        vm->tlb_dirty  = true;
 }
 static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
                               u32 num_pages, u32 pgsz_idx)
 {
@@ -1986,6 +1985,18 @@ err_unmap:
        return -EINVAL;
 }
+static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
+                               u32 num_pages, u32 pgsz_idx, bool refplus)
+{
+        return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
+}
+void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
+                               u64 size, u32 pgsz_idx) {
+        __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
+                                false, gk20a_mem_flag_none);
+}
 /* NOTE! mapped_buffers lock must be held */
 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 {
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
                        gmmu_page_shifts[pgsz_idx];
                /* there is little we can do if this fails... */
-                g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx);
+                if (g->ops.mm.put_empty) {
+                        g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
+                } else {
+                        __locked_gmmu_unmap(vm,
+                                mapped_buffer->addr,
+                                mapped_buffer->size,
+                                mapped_buffer->pgsz_idx,
+                                mapped_buffer->va_allocated,
+                                gk20a_mem_flag_none);
+                        g->ops.mm.set_sparse(vm, vaddr,
+                                        num_pages, pgsz_idx, false);
+                }
        } else
                __locked_gmmu_unmap(vm,
                                mapped_buffer->addr,
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        /* mark that we need to use sparse mappings here */
        if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
                err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
-                                         pgsz_idx);
+                                         pgsz_idx, true);
                if (err) {
                        mutex_unlock(&vm->update_gmmu_lock);
                        vma->free(vma, start_page_nr, args->pages);
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
        struct gk20a_allocator *vma;
        struct vm_gk20a *vm = as_share->vm;
        struct vm_reserved_va_node *va_node;
+        struct gk20a *g = gk20a_from_vm(vm);
        gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
                        args->pages, args->offset);
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                /* if this was a sparse mapping, free the va */
                if (va_node->sparse)
-                        __locked_gmmu_unmap(vm,
+                        g->ops.mm.clear_sparse(vm,
-                                va_node->vaddr_start,
+                                        va_node->vaddr_start,
-                                va_node->size,
+                                        va_node->size,
-                                va_node->pgsz_idx,
+                                        va_node->pgsz_idx);
-                                false,
-                                gk20a_mem_flag_none);
                kfree(va_node);
        }
        mutex_unlock(&vm->update_gmmu_lock);
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
 void gk20a_init_mm(struct gpu_ops *gops)
 {
-        gops->mm.set_sparse = gk20a_vm_put_empty;
+        gops->mm.set_sparse = gk20a_vm_put_sparse;
+        gops->mm.put_empty = gk20a_vm_put_empty;
+        gops->mm.clear_sparse = gk20a_vm_clear_sparse;
 }
author	Kevin Huang <kevinh@nvidia.com>	2014-07-29 18:56:17 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:10:52 -0400
commit	62e80a189cfa0b6dbb9e27712a1c782e953c32f4 (patch)
tree	3c8f95d9af8ab26d792e765b64547a7df4273e60 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent	4439a8e311e09b1f3af1a70d4111c75e002a607d (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 57c61d51..93a29b13 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -100,7 +100,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
100	u64 first_vaddr, u64 last_vaddr,	100	u64 first_vaddr, u64 last_vaddr,
101	u8 kind_v, u32 ctag_offset, bool cacheable,	101	u8 kind_v, u32 ctag_offset, bool cacheable,
102	int rw_flag);	102	int rw_flag);
103	static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
104	static void gk20a_vm_remove_support(struct vm_gk20a *vm);	103	static void gk20a_vm_remove_support(struct vm_gk20a *vm);
105	static int gk20a_init_system_vm(struct mm_gk20a *mm);	104	static int gk20a_init_system_vm(struct mm_gk20a *mm);
106	static int gk20a_init_bar1_vm(struct mm_gk20a *mm);	105	static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
@@ -444,7 +443,7 @@ err_out:
444	return -ENOMEM;	443	return -ENOMEM;
445	}	444	}
446		445
447	static void free_gmmu_pages(struct vm_gk20a vm, void handle,	446	void free_gmmu_pages(struct vm_gk20a vm, void handle,
448	struct sg_table *sgt, u32 order,	447	struct sg_table *sgt, u32 order,
449	size_t size)	448	size_t size)
450	{	449	{
@@ -534,7 +533,7 @@ err_out:
534	return -ENOMEM;	533	return -ENOMEM;
535	}	534	}
536		535
537	static void free_gmmu_pages(struct vm_gk20a vm, void handle,	536	void free_gmmu_pages(struct vm_gk20a vm, void handle,
538	struct sg_table *sgt, u32 order,	537	struct sg_table *sgt, u32 order,
539	size_t size)	538	size_t size)
540	{	539	{
@@ -1865,7 +1864,7 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
1865	made. So, superfluous updates will cause unnecessary	1864	made. So, superfluous updates will cause unnecessary
1866	pde invalidations.	1865	pde invalidations.
1867	*/	1866	*/
1868	static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)	1867	void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1869	{	1868	{
1870	bool small_valid, big_valid;	1869	bool small_valid, big_valid;
1871	u64 pte_addr[2] = {0, 0};	1870	u64 pte_addr[2] = {0, 0};
@@ -1882,6 +1881,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1882	if (small_valid)	1881	if (small_valid)
1883	pte_addr[gmmu_page_size_small] =	1882	pte_addr[gmmu_page_size_small] =
1884	gk20a_mm_iova_addr(small_pte->sgt->sgl);	1883	gk20a_mm_iova_addr(small_pte->sgt->sgl);
		1884
1885	if (big_valid)	1885	if (big_valid)
1886	pte_addr[gmmu_page_size_big] =	1886	pte_addr[gmmu_page_size_big] =
1887	gk20a_mm_iova_addr(big_pte->sgt->sgl);	1887	gk20a_mm_iova_addr(big_pte->sgt->sgl);
@@ -1920,7 +1920,6 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1920	vm->tlb_dirty = true;	1920	vm->tlb_dirty = true;
1921	}	1921	}
1922		1922
1923
1924	static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,	1923	static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1925	u32 num_pages, u32 pgsz_idx)	1924	u32 num_pages, u32 pgsz_idx)
1926	{	1925	{
@@ -1986,6 +1985,18 @@ err_unmap:
1986	return -EINVAL;	1985	return -EINVAL;
1987	}	1986	}
1988		1987
		1988	static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
		1989	u32 num_pages, u32 pgsz_idx, bool refplus)
		1990	{
		1991	return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
		1992	}
		1993
		1994	void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
		1995	u64 size, u32 pgsz_idx) {
		1996	__locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
		1997	false, gk20a_mem_flag_none);
		1998	}
		1999
1989	/* NOTE! mapped_buffers lock must be held */	2000	/* NOTE! mapped_buffers lock must be held */
1990	static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)	2001	static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1991	{	2002	{
@@ -2000,8 +2011,18 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2000	gmmu_page_shifts[pgsz_idx];	2011	gmmu_page_shifts[pgsz_idx];
2001		2012
2002	/* there is little we can do if this fails... */	2013	/* there is little we can do if this fails... */
2003	g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx);	2014	if (g->ops.mm.put_empty) {
2004		2015	g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
		2016	} else {
		2017	__locked_gmmu_unmap(vm,
		2018	mapped_buffer->addr,
		2019	mapped_buffer->size,
		2020	mapped_buffer->pgsz_idx,
		2021	mapped_buffer->va_allocated,
		2022	gk20a_mem_flag_none);
		2023	g->ops.mm.set_sparse(vm, vaddr,
		2024	num_pages, pgsz_idx, false);
		2025	}
2005	} else	2026	} else
2006	__locked_gmmu_unmap(vm,	2027	__locked_gmmu_unmap(vm,
2007	mapped_buffer->addr,	2028	mapped_buffer->addr,
@@ -2328,7 +2349,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2328	/* mark that we need to use sparse mappings here */	2349	/* mark that we need to use sparse mappings here */
2329	if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {	2350	if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2330	err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,	2351	err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
2331	pgsz_idx);	2352	pgsz_idx, true);
2332	if (err) {	2353	if (err) {
2333	mutex_unlock(&vm->update_gmmu_lock);	2354	mutex_unlock(&vm->update_gmmu_lock);
2334	vma->free(vma, start_page_nr, args->pages);	2355	vma->free(vma, start_page_nr, args->pages);
@@ -2357,6 +2378,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2357	struct gk20a_allocator *vma;	2378	struct gk20a_allocator *vma;
2358	struct vm_gk20a *vm = as_share->vm;	2379	struct vm_gk20a *vm = as_share->vm;
2359	struct vm_reserved_va_node *va_node;	2380	struct vm_reserved_va_node *va_node;
		2381	struct gk20a *g = gk20a_from_vm(vm);
2360		2382
2361	gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,	2383	gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2362	args->pages, args->offset);	2384	args->pages, args->offset);
@@ -2400,12 +2422,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2400		2422
2401	/* if this was a sparse mapping, free the va */	2423	/* if this was a sparse mapping, free the va */
2402	if (va_node->sparse)	2424	if (va_node->sparse)
2403	__locked_gmmu_unmap(vm,	2425	g->ops.mm.clear_sparse(vm,
2404	va_node->vaddr_start,	2426	va_node->vaddr_start,
2405	va_node->size,	2427	va_node->size,
2406	va_node->pgsz_idx,	2428	va_node->pgsz_idx);
2407	false,
2408	gk20a_mem_flag_none);
2409	kfree(va_node);	2429	kfree(va_node);
2410	}	2430	}
2411	mutex_unlock(&vm->update_gmmu_lock);	2431	mutex_unlock(&vm->update_gmmu_lock);
@@ -3088,6 +3108,8 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3088		3108
3089	void gk20a_init_mm(struct gpu_ops *gops)	3109	void gk20a_init_mm(struct gpu_ops *gops)
3090	{	3110	{
3091	gops->mm.set_sparse = gk20a_vm_put_empty;	3111	gops->mm.set_sparse = gk20a_vm_put_sparse;
		3112	gops->mm.put_empty = gk20a_vm_put_empty;
		3113	gops->mm.clear_sparse = gk20a_vm_clear_sparse;
3092	}	3114	}
3093		3115