gpu: nvgpu: Unify the small and large page address spaces

The basic structure of this patch is to make the small page allocator and the large page allocator into pointers (where they used to be just structs). Then assign each of those pointers to the same actual allocator since the buddy allocator has supported mixed page sizes since its inception. For the rest of the driver some changes had to be made in order to actually support mixed pages in a single address space. 1. Unifying the allocation page size determination Since the allocation and map operations happen at distinct times both mapping and allocation of GVA space must agree on page size. This is because the allocation has to separate allocations into separate PDEs to avoid the necessity of supporting mixed PDEs. To this end a function __get_pte_size() was introduced which is used both by the balloc code and the core GPU MM code. It determines page size based only on the length of the mapping/ allocation. 2. Fixed address allocation + page size Similar to regular mappings/GVA allocations fixed address mapping page size determination had to be modified. In the past the address of the mapping determined page size since the address space split was by address (low addresses were small pages, high addresses large pages). Since that is no longer the case the page size field in the reserve memory ioctl is now honored by the mapping code. When, for instance, CUDA makes a memory reservation it specifies small or large pages. When CUDA requests mappings to be made within that address range the page size is then looked up in the reserved memory struct. Fixed address reservations were also modified to now always allocate at a PDE granularity (64M or 128M depending on large page size. This prevents non-fixed allocations from ending up in the same PDE and causing kernel panics or GMMU faults. 3. The rest... The rest of the changes are just by products of the above. Lots of places required minor updates to use a pointer to the GVA allocator struct instead of the struct itself. Lastly, this change is not truly complete. More work remains to be done in order to fully remove the notion that there was such a thing as separate address spaces for different page sizes. Basically after this patch what remains is cleanup and proper documentation. Bug 1396644 Bug 1729947 Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1265300 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2015-06-17 13:31:08 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-01-31 19:23:07 -0500
commit: d630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch)
tree: 5b9cad58f585424a64e7b675d503a87bbcada254 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parent: 793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff)
1 files changed, 49 insertions, 66 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 66c9344b..a21a020d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
        WARN_ON(err || msg.ret);
-        nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
+        if (nvgpu_alloc_initialized(&vm->kernel))
-        if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small]))
+                nvgpu_alloc_destroy(&vm->kernel);
-                nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
+        if (nvgpu_alloc_initialized(&vm->user))
-        if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big]))
+                nvgpu_alloc_destroy(&vm->user);
-                nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]);
+        if (nvgpu_alloc_initialized(&vm->fixed))
+                nvgpu_alloc_destroy(&vm->fixed);
        mutex_unlock(&vm->update_gmmu_lock);
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
        struct mm_gk20a *mm = &g->mm;
        struct vm_gk20a *vm;
-        u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
+        u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
-                kernel_vma_start, kernel_vma_limit;
        char name[32];
        int err, i;
        const bool userspace_managed =
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        vm->mm = mm;
        vm->as_share = as_share;
+        /* Set up vma pointers. */
+        vm->vma[0] = &vm->user;
+        vm->vma[1] = &vm->user;
+        vm->vma[2] = &vm->kernel;
        for (i = 0; i < gmmu_nr_page_sizes; i++)
                vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        vm->handle = p->handle;
        /* setup vma limits */
-        small_vma_start = vm->va_start;
+        user_vma_start = vm->va_start;
+        user_vma_limit = vm->va_limit - mm->channel.kernel_size;
-        if (vm->big_pages) {
-                /* First 16GB of the address space goes towards small
-                 * pages. The kernel reserved pages are at the end.
-                 * What ever remains is allocated to large pages.
-                 */
-                small_vma_limit = __nv_gmmu_va_small_page_limit();
-                large_vma_start = small_vma_limit;
-                large_vma_limit = vm->va_limit - mm->channel.kernel_size;
-        } else {
-                small_vma_limit = vm->va_limit - mm->channel.kernel_size;
-                large_vma_start = 0;
-                large_vma_limit = 0;
-        }
        kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
        kernel_vma_limit = vm->va_limit;
        gk20a_dbg_info(
-                "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
+                "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
-                small_vma_start, small_vma_limit,
+                user_vma_start, user_vma_limit,
-                large_vma_start, large_vma_limit,
                kernel_vma_start, kernel_vma_limit);
-        /* check that starts do not exceed limits */
+        WARN_ON(user_vma_start > user_vma_limit);
-        WARN_ON(small_vma_start > small_vma_limit);
-        WARN_ON(large_vma_start > large_vma_limit);
-        /* kernel_vma must also be non-zero */
        WARN_ON(kernel_vma_start >= kernel_vma_limit);
-        if (small_vma_start > small_vma_limit ||
+        if (user_vma_start > user_vma_limit ||
-            large_vma_start > large_vma_limit ||
            kernel_vma_start >= kernel_vma_limit) {
                err = -EINVAL;
                goto clean_up_share;
        }
-        if (small_vma_start < small_vma_limit) {
+        if (user_vma_start < user_vma_limit) {
                snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
                         gmmu_page_sizes[gmmu_page_size_small] >> 10);
+                if (!gk20a_big_pages_possible(vm, user_vma_start,
+                                              user_vma_limit - user_vma_start))
+                        vm->big_pages = false;
                err = __nvgpu_buddy_allocator_init(
                                        g,
-                                        &vm->vma[gmmu_page_size_small],
+                                        vm->vma[gmmu_page_size_small],
                                        vm, name,
-                                        small_vma_start,
+                                        user_vma_start,
-                                        small_vma_limit - small_vma_start,
+                                        user_vma_limit - user_vma_start,
                                        SZ_4K,
                                        GPU_BALLOC_MAX_ORDER,
                                        GPU_ALLOC_GVA_SPACE);
                if (err)
                        goto clean_up_share;
-        }
+        } else {
+                /*
-        if (large_vma_start < large_vma_limit) {
+                 * Make these allocator pointers point to the kernel allocator
-                snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                 * since we still use the legacy notion of page size to choose
-                        gmmu_page_sizes[gmmu_page_size_big] >> 10);
+                 * the allocator.
-                err = __nvgpu_buddy_allocator_init(
+                 */
-                                        g,
+                vm->vma[0] = &vm->kernel;
-                                        &vm->vma[gmmu_page_size_big],
+                vm->vma[1] = &vm->kernel;
-                                        vm, name,
-                                        large_vma_start,
-                                        large_vma_limit - large_vma_start,
-                                        big_page_size,
-                                        GPU_BALLOC_MAX_ORDER,
-                                        GPU_ALLOC_GVA_SPACE);
-                if (err)
-                        goto clean_up_small_allocator;
        }
        snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
                 gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
+        if (!gk20a_big_pages_possible(vm, kernel_vma_start,
+                                     kernel_vma_limit - kernel_vma_start))
+                vm->big_pages = false;
        /*
         * kernel reserved VMA is at the end of the aperture
         */
        err = __nvgpu_buddy_allocator_init(
-                                     g,
+                                g,
-                                     &vm->vma[gmmu_page_size_kernel],
+                                vm->vma[gmmu_page_size_kernel],
-                                     vm, name,
+                                vm, name,
-                                     kernel_vma_start,
+                                kernel_vma_start,
-                                     kernel_vma_limit - kernel_vma_start,
+                                kernel_vma_limit - kernel_vma_start,
-                                     SZ_4K,
+                                SZ_4K,
-                                     GPU_BALLOC_MAX_ORDER,
+                                GPU_BALLOC_MAX_ORDER,
-                                     GPU_ALLOC_GVA_SPACE);
+                                GPU_ALLOC_GVA_SPACE);
        if (err)
-                goto clean_up_big_allocator;
+                goto clean_up_user_allocator;
        vm->mapped_buffers = RB_ROOT;
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        return 0;
-clean_up_big_allocator:
+clean_up_user_allocator:
-        if (large_vma_start < large_vma_limit)
+        if (user_vma_start < user_vma_limit)
-                nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]);
+                nvgpu_alloc_destroy(&vm->user);
-clean_up_small_allocator:
-        if (small_vma_start < small_vma_limit)
-                nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
 clean_up_share:
        msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
        msg.handle = vgpu_get_handle(g);
author	Alex Waterman <alexw@nvidia.com>	2015-06-17 13:31:08 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-01-31 19:23:07 -0500
commit	d630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch)
tree	5b9cad58f585424a64e7b675d503a87bbcada254 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parent	793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 66c9344b..a21a020d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
227	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));	227	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
228	WARN_ON(err \|\| msg.ret);	228	WARN_ON(err \|\| msg.ret);
229		229
230	nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);	230	if (nvgpu_alloc_initialized(&vm->kernel))
231	if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small]))	231	nvgpu_alloc_destroy(&vm->kernel);
232	nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);	232	if (nvgpu_alloc_initialized(&vm->user))
233	if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big]))	233	nvgpu_alloc_destroy(&vm->user);
234	nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]);	234	if (nvgpu_alloc_initialized(&vm->fixed))
		235	nvgpu_alloc_destroy(&vm->fixed);
235		236
236	mutex_unlock(&vm->update_gmmu_lock);	237	mutex_unlock(&vm->update_gmmu_lock);
237		238
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
273	struct tegra_vgpu_as_share_params *p = &msg.params.as_share;	274	struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
274	struct mm_gk20a *mm = &g->mm;	275	struct mm_gk20a *mm = &g->mm;
275	struct vm_gk20a *vm;	276	struct vm_gk20a *vm;
276	u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,	277	u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
277	kernel_vma_start, kernel_vma_limit;
278	char name[32];	278	char name[32];
279	int err, i;	279	int err, i;
280	const bool userspace_managed =	280	const bool userspace_managed =
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
306	vm->mm = mm;	306	vm->mm = mm;
307	vm->as_share = as_share;	307	vm->as_share = as_share;
308		308
		309	/* Set up vma pointers. */
		310	vm->vma[0] = &vm->user;
		311	vm->vma[1] = &vm->user;
		312	vm->vma[2] = &vm->kernel;
		313
309	for (i = 0; i < gmmu_nr_page_sizes; i++)	314	for (i = 0; i < gmmu_nr_page_sizes; i++)
310	vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];	315	vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];
311		316
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
328	vm->handle = p->handle;	333	vm->handle = p->handle;
329		334
330	/* setup vma limits */	335	/* setup vma limits */
331	small_vma_start = vm->va_start;	336	user_vma_start = vm->va_start;
332		337	user_vma_limit = vm->va_limit - mm->channel.kernel_size;
333	if (vm->big_pages) {
334	/* First 16GB of the address space goes towards small
335	* pages. The kernel reserved pages are at the end.
336	* What ever remains is allocated to large pages.
337	*/
338	small_vma_limit = __nv_gmmu_va_small_page_limit();
339	large_vma_start = small_vma_limit;
340	large_vma_limit = vm->va_limit - mm->channel.kernel_size;
341	} else {
342	small_vma_limit = vm->va_limit - mm->channel.kernel_size;
343	large_vma_start = 0;
344	large_vma_limit = 0;
345	}
346		338
347	kernel_vma_start = vm->va_limit - mm->channel.kernel_size;	339	kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
348	kernel_vma_limit = vm->va_limit;	340	kernel_vma_limit = vm->va_limit;
349		341
350	gk20a_dbg_info(	342	gk20a_dbg_info(
351	"small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",	343	"user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
352	small_vma_start, small_vma_limit,	344	user_vma_start, user_vma_limit,
353	large_vma_start, large_vma_limit,
354	kernel_vma_start, kernel_vma_limit);	345	kernel_vma_start, kernel_vma_limit);
355		346
356	/* check that starts do not exceed limits */	347	WARN_ON(user_vma_start > user_vma_limit);
357	WARN_ON(small_vma_start > small_vma_limit);
358	WARN_ON(large_vma_start > large_vma_limit);
359	/* kernel_vma must also be non-zero */
360	WARN_ON(kernel_vma_start >= kernel_vma_limit);	348	WARN_ON(kernel_vma_start >= kernel_vma_limit);
361		349
362	if (small_vma_start > small_vma_limit \|\|	350	if (user_vma_start > user_vma_limit \|\|
363	large_vma_start > large_vma_limit \|\|
364	kernel_vma_start >= kernel_vma_limit) {	351	kernel_vma_start >= kernel_vma_limit) {
365	err = -EINVAL;	352	err = -EINVAL;
366	goto clean_up_share;	353	goto clean_up_share;
367	}	354	}
368		355
369	if (small_vma_start < small_vma_limit) {	356	if (user_vma_start < user_vma_limit) {
370	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,	357	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
371	gmmu_page_sizes[gmmu_page_size_small] >> 10);	358	gmmu_page_sizes[gmmu_page_size_small] >> 10);
		359	if (!gk20a_big_pages_possible(vm, user_vma_start,
		360	user_vma_limit - user_vma_start))
		361	vm->big_pages = false;
372		362
373	err = __nvgpu_buddy_allocator_init(	363	err = __nvgpu_buddy_allocator_init(
374	g,	364	g,
375	&vm->vma[gmmu_page_size_small],	365	vm->vma[gmmu_page_size_small],
376	vm, name,	366	vm, name,
377	small_vma_start,	367	user_vma_start,
378	small_vma_limit - small_vma_start,	368	user_vma_limit - user_vma_start,
379	SZ_4K,	369	SZ_4K,
380	GPU_BALLOC_MAX_ORDER,	370	GPU_BALLOC_MAX_ORDER,
381	GPU_ALLOC_GVA_SPACE);	371	GPU_ALLOC_GVA_SPACE);
382	if (err)	372	if (err)
383	goto clean_up_share;	373	goto clean_up_share;
384	}	374	} else {
385		375	/*
386	if (large_vma_start < large_vma_limit) {	376	* Make these allocator pointers point to the kernel allocator
387	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,	377	* since we still use the legacy notion of page size to choose
388	gmmu_page_sizes[gmmu_page_size_big] >> 10);	378	* the allocator.
389	err = __nvgpu_buddy_allocator_init(	379	*/
390	g,	380	vm->vma[0] = &vm->kernel;
391	&vm->vma[gmmu_page_size_big],	381	vm->vma[1] = &vm->kernel;
392	vm, name,
393	large_vma_start,
394	large_vma_limit - large_vma_start,
395	big_page_size,
396	GPU_BALLOC_MAX_ORDER,
397	GPU_ALLOC_GVA_SPACE);
398	if (err)
399	goto clean_up_small_allocator;
400	}	382	}
401		383
402	snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",	384	snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
403	gmmu_page_sizes[gmmu_page_size_kernel] >> 10);	385	gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
		386	if (!gk20a_big_pages_possible(vm, kernel_vma_start,
		387	kernel_vma_limit - kernel_vma_start))
		388	vm->big_pages = false;
		389
404	/*	390	/*
405	* kernel reserved VMA is at the end of the aperture	391	* kernel reserved VMA is at the end of the aperture
406	*/	392	*/
407	err = __nvgpu_buddy_allocator_init(	393	err = __nvgpu_buddy_allocator_init(
408	g,	394	g,
409	&vm->vma[gmmu_page_size_kernel],	395	vm->vma[gmmu_page_size_kernel],
410	vm, name,	396	vm, name,
411	kernel_vma_start,	397	kernel_vma_start,
412	kernel_vma_limit - kernel_vma_start,	398	kernel_vma_limit - kernel_vma_start,
413	SZ_4K,	399	SZ_4K,
414	GPU_BALLOC_MAX_ORDER,	400	GPU_BALLOC_MAX_ORDER,
415	GPU_ALLOC_GVA_SPACE);	401	GPU_ALLOC_GVA_SPACE);
416	if (err)	402	if (err)
417	goto clean_up_big_allocator;	403	goto clean_up_user_allocator;
418		404
419	vm->mapped_buffers = RB_ROOT;	405	vm->mapped_buffers = RB_ROOT;
420		406
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
426		412
427	return 0;	413	return 0;
428		414
429	clean_up_big_allocator:	415	clean_up_user_allocator:
430	if (large_vma_start < large_vma_limit)	416	if (user_vma_start < user_vma_limit)
431	nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]);	417	nvgpu_alloc_destroy(&vm->user);
432	clean_up_small_allocator:
433	if (small_vma_start < small_vma_limit)
434	nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
435	clean_up_share:	418	clean_up_share:
436	msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;	419	msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
437	msg.handle = vgpu_get_handle(g);	420	msg.handle = vgpu_get_handle(g);