3 files changed, 38 insertions, 20 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 949237b1..09948a25 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1811,6 +1811,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                        goto clean_up;
                }
+                BUG_ON(!pte_kv_cur);
                gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
                for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
                        if (likely(sgt)) {
@@ -2128,9 +2130,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
                char *name)
 {
        int err, i;
-        u32 num_pages, low_hole_pages;
+        u32 num_small_pages, num_large_pages, low_hole_pages;
        char alloc_name[32];
-        u64 vma_size;
+        u64 small_vma_size, large_vma_size;
+        u32 pde_pages;
        /* note: keep the page sizes sorted lowest to highest here */
        u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
@@ -2206,7 +2209,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
                   name, vm->va_limit, vm->pdes.num_pdes);
        /* allocate the page table directory */
-        err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
+        pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
+        gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages);
+        err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref,
                               &vm->pdes.sgt, &vm->pdes.size);
        if (err)
                goto clean_up_pdes;
@@ -2220,13 +2226,15 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
                  vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));
        /* we could release vm->pdes.kv but it's only one page... */
-        /* low-half: alloc small pages */
+        /* First 16GB of the address space goes towards small pages. What ever
-        /* high-half: alloc big pages */
+         * remains is allocated to large pages. */
-        vma_size = vm->va_limit;
+        small_vma_size = vm->va_limit;
-        if (big_pages)
+        if (big_pages) {
-                vma_size /= 2;
+                small_vma_size = (u64)16 << 30;
+                large_vma_size = vm->va_limit - small_vma_size;
+        }
-        num_pages = (u32)(vma_size >>
+        num_small_pages = (u32)(small_vma_size >>
                    ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
        /* num_pages above is without regard to the low-side hole. */
@@ -2238,20 +2246,22 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
        err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
                             alloc_name,
                             low_hole_pages,             /*start*/
-                             num_pages - low_hole_pages);/* length*/
+                             num_small_pages - low_hole_pages);/* length*/
        if (err)
                goto clean_up_map_pde;
        if (big_pages) {
-                num_pages = (u32)((vm->va_limit / 2) >>
+                u32 start = (u32)(small_vma_size >>
+                            ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
+                num_large_pages = (u32)(large_vma_size >>
                            ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
                         name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
                err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
                                      alloc_name,
-                                      num_pages,                /* start */
+                                      start,                    /* start */
-                                      num_pages);               /* length */
+                                      num_large_pages);         /* length */
                if (err)
                        goto clean_up_small_allocator;
        }
@@ -2269,7 +2279,7 @@ clean_up_small_allocator:
 clean_up_map_pde:
        unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
 clean_up_ptes:
-        free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+        free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
                        vm->pdes.size);
 clean_up_pdes:
        kfree(vm->pdes.ptes[gmmu_page_size_small]);
@@ -2647,10 +2657,15 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
 static void gk20a_deinit_vm(struct vm_gk20a *vm)
 {
+        u32 pde_pages;
        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
        unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
-        free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+        pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
+        free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
                        vm->pdes.size);
        kfree(vm->pdes.ptes[gmmu_page_size_small]);
        kfree(vm->pdes.ptes[gmmu_page_size_big]);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 04f9446b..d39dcff0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -25,10 +25,13 @@
 #include <asm/cacheflush.h>
 #include "gk20a_allocator.h"
-/* For now keep the size relatively small-ish compared to the full
+/*
- * 40b va.  32GB for now. It consists of two 16GB spaces. */
+ * Amount of the GVA space we actually use is smaller than the available space.
-#define NV_GMMU_VA_RANGE        35ULL
+ * The bottom 16GB of the space are used for small pages, the remaining high
-#define NV_GMMU_VA_IS_UPPER(x)  ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
+ * memory is for large pages.
+ */
+#define NV_GMMU_VA_RANGE        37ULL
+#define NV_GMMU_VA_IS_UPPER(x)  ((x) >= ((u64)SZ_1G * 16))
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 5b1a9a04..1adff5ab 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -111,7 +111,7 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
        gk20a_dbg_fn("");
-        vaddr_hi = vaddr + pgsz * num_pages - 1;
+        vaddr_hi = vaddr + pgsz * (u64)num_pages - 1;
        pde_range_from_vaddr_range(vm,
                                   vaddr,
                                   vaddr_hi,

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 949237b1..09948a25 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1811,6 +1811,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1811	goto clean_up;	1811	goto clean_up;
1812	}	1812	}
1813		1813
		1814	BUG_ON(!pte_kv_cur);
		1815
1814	gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);	1816	gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1815	for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {	1817	for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1816	if (likely(sgt)) {	1818	if (likely(sgt)) {
@@ -2128,9 +2130,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2128	char *name)	2130	char *name)
2129	{	2131	{
2130	int err, i;	2132	int err, i;
2131	u32 num_pages, low_hole_pages;	2133	u32 num_small_pages, num_large_pages, low_hole_pages;
2132	char alloc_name[32];	2134	char alloc_name[32];
2133	u64 vma_size;	2135	u64 small_vma_size, large_vma_size;
		2136	u32 pde_pages;
2134		2137
2135	/* note: keep the page sizes sorted lowest to highest here */	2138	/* note: keep the page sizes sorted lowest to highest here */
2136	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };	2139	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
@@ -2206,7 +2209,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2206	name, vm->va_limit, vm->pdes.num_pdes);	2209	name, vm->va_limit, vm->pdes.num_pdes);
2207		2210
2208	/* allocate the page table directory */	2211	/* allocate the page table directory */
2209	err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,	2212	pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
		2213
		2214	gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages);
		2215	err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref,
2210	&vm->pdes.sgt, &vm->pdes.size);	2216	&vm->pdes.sgt, &vm->pdes.size);
2211	if (err)	2217	if (err)
2212	goto clean_up_pdes;	2218	goto clean_up_pdes;
@@ -2220,13 +2226,15 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2220	vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));	2226	vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));
2221	/* we could release vm->pdes.kv but it's only one page... */	2227	/* we could release vm->pdes.kv but it's only one page... */
2222		2228
2223	/* low-half: alloc small pages */	2229	/* First 16GB of the address space goes towards small pages. What ever
2224	/* high-half: alloc big pages */	2230	* remains is allocated to large pages. */
2225	vma_size = vm->va_limit;	2231	small_vma_size = vm->va_limit;
2226	if (big_pages)	2232	if (big_pages) {
2227	vma_size /= 2;	2233	small_vma_size = (u64)16 << 30;
		2234	large_vma_size = vm->va_limit - small_vma_size;
		2235	}
2228		2236
2229	num_pages = (u32)(vma_size >>	2237	num_small_pages = (u32)(small_vma_size >>
2230	ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));	2238	ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2231		2239
2232	/* num_pages above is without regard to the low-side hole. */	2240	/* num_pages above is without regard to the low-side hole. */
@@ -2238,20 +2246,22 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2238	err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],	2246	err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2239	alloc_name,	2247	alloc_name,
2240	low_hole_pages, /start/	2248	low_hole_pages, /start/
2241	num_pages - low_hole_pages);/* length*/	2249	num_small_pages - low_hole_pages);/* length*/
2242	if (err)	2250	if (err)
2243	goto clean_up_map_pde;	2251	goto clean_up_map_pde;
2244		2252
2245	if (big_pages) {	2253	if (big_pages) {
2246	num_pages = (u32)((vm->va_limit / 2) >>	2254	u32 start = (u32)(small_vma_size >>
		2255	ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
		2256	num_large_pages = (u32)(large_vma_size >>
2247	ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));	2257	ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2248		2258
2249	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",	2259	snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2250	name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);	2260	name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2251	err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],	2261	err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2252	alloc_name,	2262	alloc_name,
2253	num_pages, /* start */	2263	start, /* start */
2254	num_pages); /* length */	2264	num_large_pages); /* length */
2255	if (err)	2265	if (err)
2256	goto clean_up_small_allocator;	2266	goto clean_up_small_allocator;
2257	}	2267	}
@@ -2269,7 +2279,7 @@ clean_up_small_allocator:
2269	clean_up_map_pde:	2279	clean_up_map_pde:
2270	unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);	2280	unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2271	clean_up_ptes:	2281	clean_up_ptes:
2272	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,	2282	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
2273	vm->pdes.size);	2283	vm->pdes.size);
2274	clean_up_pdes:	2284	clean_up_pdes:
2275	kfree(vm->pdes.ptes[gmmu_page_size_small]);	2285	kfree(vm->pdes.ptes[gmmu_page_size_small]);
@@ -2647,10 +2657,15 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2647		2657
2648	static void gk20a_deinit_vm(struct vm_gk20a *vm)	2658	static void gk20a_deinit_vm(struct vm_gk20a *vm)
2649	{	2659	{
		2660	u32 pde_pages;
		2661
2650	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);	2662	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2651	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);	2663	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
		2664
2652	unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);	2665	unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2653	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,	2666
		2667	pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
		2668	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
2654	vm->pdes.size);	2669	vm->pdes.size);
2655	kfree(vm->pdes.ptes[gmmu_page_size_small]);	2670	kfree(vm->pdes.ptes[gmmu_page_size_small]);
2656	kfree(vm->pdes.ptes[gmmu_page_size_big]);	2671	kfree(vm->pdes.ptes[gmmu_page_size_big]);


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 04f9446b..d39dcff0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -25,10 +25,13 @@
25	#include <asm/cacheflush.h>	25	#include <asm/cacheflush.h>
26	#include "gk20a_allocator.h"	26	#include "gk20a_allocator.h"
27		27
28	/* For now keep the size relatively small-ish compared to the full	28	/*
29	* 40b va. 32GB for now. It consists of two 16GB spaces. */	29	* Amount of the GVA space we actually use is smaller than the available space.
30	#define NV_GMMU_VA_RANGE 35ULL	30	* The bottom 16GB of the space are used for small pages, the remaining high
31	#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))	31	* memory is for large pages.
		32	*/
		33	#define NV_GMMU_VA_RANGE 37ULL
		34	#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)SZ_1G * 16))
32		35
33	#ifdef CONFIG_ARM64	36	#ifdef CONFIG_ARM64
34	#define outer_flush_range(a, b)	37	#define outer_flush_range(a, b)


diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 5b1a9a04..1adff5ab 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -111,7 +111,7 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
111		111
112	gk20a_dbg_fn("");	112	gk20a_dbg_fn("");
113		113
114	vaddr_hi = vaddr + pgsz * num_pages - 1;	114	vaddr_hi = vaddr + pgsz * (u64)num_pages - 1;
115	pde_range_from_vaddr_range(vm,	115	pde_range_from_vaddr_range(vm,
116	vaddr,	116	vaddr,
117	vaddr_hi,	117	vaddr_hi,