summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-06-24 17:12:24 -0400
committerAlex Waterman <alexw@nvidia.com>2016-07-19 14:21:46 -0400
commitb6569319c772d84087a0a1a6d7146bdcae8e9aab (patch)
tree16e7bae422279925301d9116b1e7f4d8aa656483 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parentf4b77e465648e87b19a7df4bb2a121ac8ac1b851 (diff)
gpu: nvgpu: Support multiple types of allocators
Support multiple types of allocation backends. Currently there is only one allocator implementation available: a buddy allocator. Buddy allocators have certain limitations though. For one the allocator requires metadata to be allocated from the kernel's system memory. This causes a given buddy allocation to potentially sleep on a kmalloc() call. This patch has been created so that a new backend can be created which will avoid any dynamic system memory management routines from being called. Bug 1781897 Change-Id: I98d6c8402c049942f13fee69c6901a166f177f65 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1172115 GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c112
1 files changed, 57 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 673aafda..ffc695f5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1331,7 +1331,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1331 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 1331 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
1332 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 1332 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
1333 1333
1334 offset = gk20a_balloc(vma, size); 1334 offset = gk20a_alloc(vma, size);
1335 if (!offset) { 1335 if (!offset) {
1336 gk20a_err(dev_from_vm(vm), 1336 gk20a_err(dev_from_vm(vm),
1337 "%s oom: sz=0x%llx", vma->name, size); 1337 "%s oom: sz=0x%llx", vma->name, size);
@@ -1350,7 +1350,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1350 1350
1351 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 1351 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
1352 vma->name, offset, size); 1352 vma->name, offset, size);
1353 gk20a_bfree(vma, offset); 1353 gk20a_free(vma, offset);
1354 1354
1355 return 0; 1355 return 0;
1356} 1356}
@@ -3407,12 +3407,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
3407 * 3407 *
3408 * !!! TODO: cleanup. 3408 * !!! TODO: cleanup.
3409 */ 3409 */
3410 sema_sea->gpu_va = gk20a_balloc_fixed(&vm->vma[gmmu_page_size_kernel], 3410 sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel],
3411 vm->va_limit - 3411 vm->va_limit -
3412 mm->channel.kernel_size, 3412 mm->channel.kernel_size,
3413 512 * PAGE_SIZE); 3413 512 * PAGE_SIZE);
3414 if (!sema_sea->gpu_va) { 3414 if (!sema_sea->gpu_va) {
3415 gk20a_bfree(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); 3415 gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va);
3416 gk20a_vm_put(vm); 3416 gk20a_vm_put(vm);
3417 return -ENOMEM; 3417 return -ENOMEM;
3418 } 3418 }
@@ -3420,7 +3420,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
3420 err = gk20a_semaphore_pool_map(vm->sema_pool, vm); 3420 err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
3421 if (err) { 3421 if (err) {
3422 gk20a_semaphore_pool_unmap(vm->sema_pool, vm); 3422 gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
3423 gk20a_bfree(&vm->vma[gmmu_page_size_small], 3423 gk20a_free(&vm->vma[gmmu_page_size_small],
3424 vm->sema_pool->gpu_va); 3424 vm->sema_pool->gpu_va);
3425 gk20a_vm_put(vm); 3425 gk20a_vm_put(vm);
3426 } 3426 }
@@ -3542,13 +3542,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3542 snprintf(alloc_name, sizeof(alloc_name), 3542 snprintf(alloc_name, sizeof(alloc_name),
3543 "gk20a_%s-fixed", name); 3543 "gk20a_%s-fixed", name);
3544 3544
3545 err = __gk20a_allocator_init(&vm->fixed, 3545 err = __gk20a_buddy_allocator_init(&vm->fixed,
3546 vm, alloc_name, 3546 vm, alloc_name,
3547 small_vma_start, 3547 small_vma_start,
3548 g->separate_fixed_allocs, 3548 g->separate_fixed_allocs,
3549 SZ_4K, 3549 SZ_4K,
3550 GPU_BALLOC_MAX_ORDER, 3550 GPU_BALLOC_MAX_ORDER,
3551 GPU_BALLOC_GVA_SPACE); 3551 GPU_BALLOC_GVA_SPACE);
3552 if (err) 3552 if (err)
3553 goto clean_up_ptes; 3553 goto clean_up_ptes;
3554 3554
@@ -3559,13 +3559,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3559 if (small_vma_start < small_vma_limit) { 3559 if (small_vma_start < small_vma_limit) {
3560 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 3560 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
3561 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 3561 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
3562 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 3562 err = __gk20a_buddy_allocator_init(
3563 vm, alloc_name, 3563 &vm->vma[gmmu_page_size_small],
3564 small_vma_start, 3564 vm, alloc_name,
3565 small_vma_limit - small_vma_start, 3565 small_vma_start,
3566 SZ_4K, 3566 small_vma_limit - small_vma_start,
3567 GPU_BALLOC_MAX_ORDER, 3567 SZ_4K,
3568 GPU_BALLOC_GVA_SPACE); 3568 GPU_BALLOC_MAX_ORDER,
3569 GPU_BALLOC_GVA_SPACE);
3569 if (err) 3570 if (err)
3570 goto clean_up_ptes; 3571 goto clean_up_ptes;
3571 } 3572 }
@@ -3573,13 +3574,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3573 if (large_vma_start < large_vma_limit) { 3574 if (large_vma_start < large_vma_limit) {
3574 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 3575 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
3575 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 3576 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
3576 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 3577 err = __gk20a_buddy_allocator_init(
3577 vm, alloc_name, 3578 &vm->vma[gmmu_page_size_big],
3578 large_vma_start, 3579 vm, alloc_name,
3579 large_vma_limit - large_vma_start, 3580 large_vma_start,
3580 big_page_size, 3581 large_vma_limit - large_vma_start,
3581 GPU_BALLOC_MAX_ORDER, 3582 big_page_size,
3582 GPU_BALLOC_GVA_SPACE); 3583 GPU_BALLOC_MAX_ORDER,
3584 GPU_BALLOC_GVA_SPACE);
3583 if (err) 3585 if (err)
3584 goto clean_up_small_allocator; 3586 goto clean_up_small_allocator;
3585 } 3587 }
@@ -3589,13 +3591,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3589 /* 3591 /*
3590 * kernel reserved VMA is at the end of the aperture 3592 * kernel reserved VMA is at the end of the aperture
3591 */ 3593 */
3592 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], 3594 err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel],
3593 vm, alloc_name, 3595 vm, alloc_name,
3594 kernel_vma_start, 3596 kernel_vma_start,
3595 kernel_vma_limit - kernel_vma_start, 3597 kernel_vma_limit - kernel_vma_start,
3596 SZ_4K, 3598 SZ_4K,
3597 GPU_BALLOC_MAX_ORDER, 3599 GPU_BALLOC_MAX_ORDER,
3598 GPU_BALLOC_GVA_SPACE); 3600 GPU_BALLOC_GVA_SPACE);
3599 if (err) 3601 if (err)
3600 goto clean_up_big_allocator; 3602 goto clean_up_big_allocator;
3601 3603
@@ -3620,10 +3622,10 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3620 3622
3621clean_up_big_allocator: 3623clean_up_big_allocator:
3622 if (large_vma_start < large_vma_limit) 3624 if (large_vma_start < large_vma_limit)
3623 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 3625 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
3624clean_up_small_allocator: 3626clean_up_small_allocator:
3625 if (small_vma_start < small_vma_limit) 3627 if (small_vma_start < small_vma_limit)
3626 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 3628 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
3627clean_up_ptes: 3629clean_up_ptes:
3628 free_gmmu_pages(vm, &vm->pdb); 3630 free_gmmu_pages(vm, &vm->pdb);
3629clean_up_pdes: 3631clean_up_pdes:
@@ -3730,15 +3732,15 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3730 3732
3731 vma = &vm->vma[pgsz_idx]; 3733 vma = &vm->vma[pgsz_idx];
3732 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { 3734 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
3733 if (vm->fixed.init) 3735 if (gk20a_alloc_initialized(&vm->fixed))
3734 vma = &vm->fixed; 3736 vma = &vm->fixed;
3735 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, 3737 vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset,
3736 (u64)args->pages * 3738 (u64)args->pages *
3737 (u64)args->page_size); 3739 (u64)args->page_size);
3738 } else { 3740 } else {
3739 vaddr_start = gk20a_balloc(vma, 3741 vaddr_start = gk20a_alloc(vma,
3740 (u64)args->pages * 3742 (u64)args->pages *
3741 (u64)args->page_size); 3743 (u64)args->page_size);
3742 } 3744 }
3743 3745
3744 if (!vaddr_start) { 3746 if (!vaddr_start) {
@@ -3772,7 +3774,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3772 APERTURE_INVALID); 3774 APERTURE_INVALID);
3773 if (!map_offset) { 3775 if (!map_offset) {
3774 mutex_unlock(&vm->update_gmmu_lock); 3776 mutex_unlock(&vm->update_gmmu_lock);
3775 gk20a_bfree(vma, vaddr_start); 3777 gk20a_free(vma, vaddr_start);
3776 kfree(va_node); 3778 kfree(va_node);
3777 goto clean_up; 3779 goto clean_up;
3778 } 3780 }
@@ -3807,11 +3809,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3807 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 3809 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
3808 gmmu_page_size_big : gmmu_page_size_small; 3810 gmmu_page_size_big : gmmu_page_size_small;
3809 3811
3810 if (vm->fixed.init) 3812 if (gk20a_alloc_initialized(&vm->fixed))
3811 vma = &vm->fixed; 3813 vma = &vm->fixed;
3812 else 3814 else
3813 vma = &vm->vma[pgsz_idx]; 3815 vma = &vm->vma[pgsz_idx];
3814 gk20a_bfree(vma, args->offset); 3816 gk20a_free(vma, args->offset);
3815 3817
3816 mutex_lock(&vm->update_gmmu_lock); 3818 mutex_lock(&vm->update_gmmu_lock);
3817 va_node = addr_to_reservation(vm, args->offset); 3819 va_node = addr_to_reservation(vm, args->offset);
@@ -3995,13 +3997,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
3995 3997
3996void gk20a_deinit_vm(struct vm_gk20a *vm) 3998void gk20a_deinit_vm(struct vm_gk20a *vm)
3997{ 3999{
3998 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); 4000 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
3999 if (vm->vma[gmmu_page_size_big].init) 4001 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big]))
4000 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 4002 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
4001 if (vm->vma[gmmu_page_size_small].init) 4003 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small]))
4002 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 4004 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4003 if (vm->fixed.init) 4005 if (gk20a_alloc_initialized(&vm->fixed))
4004 gk20a_allocator_destroy(&vm->fixed); 4006 gk20a_alloc_destroy(&vm->fixed);
4005 4007
4006 gk20a_vm_free_entries(vm, &vm->pdb, 0); 4008 gk20a_vm_free_entries(vm, &vm->pdb, 0);
4007} 4009}