summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2015-06-17 13:31:08 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-31 19:23:07 -0500
commitd630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch)
tree5b9cad58f585424a64e7b675d503a87bbcada254 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff)
gpu: nvgpu: Unify the small and large page address spaces
The basic structure of this patch is to make the small page allocator and the large page allocator into pointers (where they used to be just structs). Then assign each of those pointers to the same actual allocator since the buddy allocator has supported mixed page sizes since its inception. For the rest of the driver some changes had to be made in order to actually support mixed pages in a single address space. 1. Unifying the allocation page size determination Since the allocation and map operations happen at distinct times both mapping and allocation of GVA space must agree on page size. This is because the allocation has to separate allocations into separate PDEs to avoid the necessity of supporting mixed PDEs. To this end a function __get_pte_size() was introduced which is used both by the balloc code and the core GPU MM code. It determines page size based only on the length of the mapping/ allocation. 2. Fixed address allocation + page size Similar to regular mappings/GVA allocations fixed address mapping page size determination had to be modified. In the past the address of the mapping determined page size since the address space split was by address (low addresses were small pages, high addresses large pages). Since that is no longer the case the page size field in the reserve memory ioctl is now honored by the mapping code. When, for instance, CUDA makes a memory reservation it specifies small or large pages. When CUDA requests mappings to be made within that address range the page size is then looked up in the reserved memory struct. Fixed address reservations were also modified to now always allocate at a PDE granularity (64M or 128M depending on large page size. This prevents non-fixed allocations from ending up in the same PDE and causing kernel panics or GMMU faults. 3. The rest... The rest of the changes are just by products of the above. Lots of places required minor updates to use a pointer to the GVA allocator struct instead of the struct itself. Lastly, this change is not truly complete. More work remains to be done in order to fully remove the notion that there was such a thing as separate address spaces for different page sizes. Basically after this patch what remains is cleanup and proper documentation. Bug 1396644 Bug 1729947 Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1265300 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c282
1 files changed, 168 insertions, 114 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cdbaef79..83bbcb54 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
1001 mutex_init(&mm->l2_op_lock); 1001 mutex_init(&mm->l2_op_lock);
1002 1002
1003 /*TBD: make channel vm size configurable */ 1003 /*TBD: make channel vm size configurable */
1004 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; 1004 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
1005 NV_MM_DEFAULT_KERNEL_SIZE;
1005 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; 1006 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
1006 1007
1007 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", 1008 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
@@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1626 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 1627 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
1627 1628
1628{ 1629{
1629 struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 1630 struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx];
1630 u64 offset; 1631 u64 offset;
1631 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 1632 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
1632 1633
@@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1663 u64 offset, u64 size, 1664 u64 offset, u64 size,
1664 enum gmmu_pgsz_gk20a pgsz_idx) 1665 enum gmmu_pgsz_gk20a pgsz_idx)
1665{ 1666{
1666 struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; 1667 struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
1667 1668
1668 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 1669 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
1669 vma->name, offset, size); 1670 vma->name, offset, size);
@@ -1790,13 +1791,7 @@ struct buffer_attrs {
1790static void gmmu_select_page_size(struct vm_gk20a *vm, 1791static void gmmu_select_page_size(struct vm_gk20a *vm,
1791 struct buffer_attrs *bfr) 1792 struct buffer_attrs *bfr)
1792{ 1793{
1793 int i; 1794 bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size);
1794 /* choose the biggest first (top->bottom) */
1795 for (i = gmmu_page_size_kernel - 1; i >= 0; i--)
1796 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) {
1797 bfr->pgsz_idx = i;
1798 break;
1799 }
1800} 1795}
1801 1796
1802static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, 1797static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
@@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
2497 * the alignment determined by gmmu_select_page_size(). 2492 * the alignment determined by gmmu_select_page_size().
2498 */ 2493 */
2499 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { 2494 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
2500 int pgsz_idx = 2495 int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size);
2501 __nv_gmmu_va_is_big_page_region(vm, offset_align) ?
2502 gmmu_page_size_big : gmmu_page_size_small;
2503 if (pgsz_idx > bfr.pgsz_idx) { 2496 if (pgsz_idx > bfr.pgsz_idx) {
2504 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", 2497 gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d",
2505 offset_align, bfr.pgsz_idx, pgsz_idx); 2498 offset_align, bfr.pgsz_idx, pgsz_idx);
@@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
3149 u64 addr = 0; 3142 u64 addr = 0;
3150 3143
3151 if (at) 3144 if (at)
3152 addr = nvgpu_alloc_fixed(allocator, at, size); 3145 addr = nvgpu_alloc_fixed(allocator, at, size, 0);
3153 else 3146 else
3154 addr = nvgpu_alloc(allocator, size); 3147 addr = nvgpu_alloc(allocator, size);
3155 3148
@@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4260 * 4253 *
4261 * !!! TODO: cleanup. 4254 * !!! TODO: cleanup.
4262 */ 4255 */
4263 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], 4256 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
4264 vm->va_limit - 4257 vm->va_limit -
4265 mm->channel.kernel_size, 4258 mm->channel.kernel_size,
4266 512 * PAGE_SIZE); 4259 512 * PAGE_SIZE,
4260 SZ_4K);
4267 if (!sema_sea->gpu_va) { 4261 if (!sema_sea->gpu_va) {
4268 nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); 4262 nvgpu_free(&vm->kernel, sema_sea->gpu_va);
4269 gk20a_vm_put(vm); 4263 gk20a_vm_put(vm);
4270 return -ENOMEM; 4264 return -ENOMEM;
4271 } 4265 }
@@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
4273 err = gk20a_semaphore_pool_map(vm->sema_pool, vm); 4267 err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
4274 if (err) { 4268 if (err) {
4275 gk20a_semaphore_pool_unmap(vm->sema_pool, vm); 4269 gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
4276 nvgpu_free(&vm->vma[gmmu_page_size_small], 4270 nvgpu_free(vm->vma[gmmu_page_size_small],
4277 vm->sema_pool->gpu_va); 4271 vm->sema_pool->gpu_va);
4278 gk20a_vm_put(vm); 4272 gk20a_vm_put(vm);
4279 } 4273 }
4280 4274
4281 return 0; 4275 return 0;
4282} 4276}
4283 4277
4278/*
4279 * Determine if the passed address space can support big pages or not.
4280 */
4281int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
4282{
4283 u64 mask = ((u64)vm->big_page_size << 10) - 1;
4284
4285 if (base & mask || size & mask)
4286 return 0;
4287 return 1;
4288}
4289
4290/*
4291 * Attempt to find a reserved memory area to determine PTE size for the passed
4292 * mapping. If no reserved area can be found use small pages but drop a warning.
4293 */
4294enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
4295 u64 base, u64 size)
4296{
4297 struct vm_reserved_va_node *node;
4298
4299 node = addr_to_reservation(vm, base);
4300 if (!node)
4301 return gmmu_page_size_small;
4302
4303 return node->pgsz_idx;
4304}
4305
4306/**
4307 * gk20a_init_vm() - Initialize an address space.
4308 *
4309 * @mm - Parent MM.
4310 * @vm - The VM to init.
4311 * @big_page_size - Size of big pages associated with this VM.
4312 * @low_hole - The size of the low hole (unaddressable memory at the bottom of
4313 * the address space.
4314 * @kernel_reserved - Space reserved for kernel only allocations.
4315 * @aperture_size - Total size of the aperture.
4316 * @big_pages - Ignored. Will be set based on other passed params.
4317 * @name - Name of the address space.
4318 *
4319 * This function initializes an address space according to the following map:
4320 *
4321 * +--+ 0x0
4322 * | |
4323 * +--+ @low_hole
4324 * | |
4325 * ~ ~ This is the "user" section.
4326 * | |
4327 * +--+ @aperture_size - @kernel_reserved
4328 * | |
4329 * ~ ~ This is the "kernel" section.
4330 * | |
4331 * +--+ @aperture_size
4332 *
4333 * The user section is therefor what ever is left over after the @low_hole and
4334 * @kernel_reserved memory have been portioned out. The @kernel_reserved is
4335 * always persent at the top of the memory space and the @low_hole is always at
4336 * the bottom.
4337 *
4338 * For certain address spaces a "user" section makes no sense (bar1, etc) so in
4339 * such cases the @kernel_reserved and @low_hole should sum to exactly
4340 * @aperture_size.
4341 */
4284int gk20a_init_vm(struct mm_gk20a *mm, 4342int gk20a_init_vm(struct mm_gk20a *mm,
4285 struct vm_gk20a *vm, 4343 struct vm_gk20a *vm,
4286 u32 big_page_size, 4344 u32 big_page_size,
@@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4293{ 4351{
4294 int err, i; 4352 int err, i;
4295 char alloc_name[32]; 4353 char alloc_name[32];
4296 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 4354 u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
4297 kernel_vma_start, kernel_vma_limit;
4298 u32 pde_lo, pde_hi; 4355 u32 pde_lo, pde_hi;
4299 struct gk20a *g = mm->g; 4356 struct gk20a *g = mm->g;
4300 4357
4301 /* note: this must match gmmu_pgsz_gk20a enum */ 4358 /* note: this must match gmmu_pgsz_gk20a enum */
4302 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; 4359 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
4303 4360
4304 WARN_ON(kernel_reserved + low_hole > aperture_size); 4361 if (WARN_ON(kernel_reserved + low_hole > aperture_size))
4305 if (kernel_reserved > aperture_size)
4306 return -ENOMEM; 4362 return -ENOMEM;
4307 4363
4308 vm->mm = mm; 4364 vm->mm = mm;
4309 4365
4366 /* Set up vma pointers. */
4367 vm->vma[0] = &vm->user;
4368 vm->vma[1] = &vm->user;
4369 vm->vma[2] = &vm->kernel;
4370
4310 vm->va_start = low_hole; 4371 vm->va_start = low_hole;
4311 vm->va_limit = aperture_size; 4372 vm->va_limit = aperture_size;
4312 vm->big_pages = big_pages; 4373 vm->big_pages = big_pages;
@@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4321 4382
4322 gk20a_dbg_info("small page-size (%dKB)", 4383 gk20a_dbg_info("small page-size (%dKB)",
4323 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 4384 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4324 4385 gk20a_dbg_info("big page-size (%dKB) (%s)\n",
4325 gk20a_dbg_info("big page-size (%dKB)", 4386 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name);
4326 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
4327
4328 gk20a_dbg_info("kernel page-size (%dKB)", 4387 gk20a_dbg_info("kernel page-size (%dKB)",
4329 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 4388 vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
4330 4389
@@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4348 goto clean_up_pdes; 4407 goto clean_up_pdes;
4349 4408
4350 /* setup vma limits */ 4409 /* setup vma limits */
4351 small_vma_start = low_hole; 4410 user_vma_start = low_hole;
4352 4411 user_vma_limit = vm->va_limit - kernel_reserved;
4353 if (big_pages) {
4354 /* First 16GB of the address space goes towards small
4355 * pages. What ever remains is allocated to large
4356 * pages. */
4357 small_vma_limit = __nv_gmmu_va_small_page_limit();
4358 large_vma_start = small_vma_limit;
4359 large_vma_limit = vm->va_limit - kernel_reserved;
4360 } else {
4361 small_vma_limit = vm->va_limit - kernel_reserved;
4362 large_vma_start = 0;
4363 large_vma_limit = 0;
4364 }
4365 4412
4366 kernel_vma_start = vm->va_limit - kernel_reserved; 4413 kernel_vma_start = vm->va_limit - kernel_reserved;
4367 kernel_vma_limit = vm->va_limit; 4414 kernel_vma_limit = vm->va_limit;
4368 4415
4369 gk20a_dbg_info( 4416 gk20a_dbg_info(
4370 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", 4417 "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
4371 small_vma_start, small_vma_limit, 4418 user_vma_start, user_vma_limit,
4372 large_vma_start, large_vma_limit,
4373 kernel_vma_start, kernel_vma_limit); 4419 kernel_vma_start, kernel_vma_limit);
4374 4420
4375 /* check that starts do not exceed limits */ 4421 WARN_ON(user_vma_start > user_vma_limit);
4376 WARN_ON(small_vma_start > small_vma_limit);
4377 WARN_ON(large_vma_start > large_vma_limit);
4378 /* kernel_vma must also be non-zero */
4379 WARN_ON(kernel_vma_start >= kernel_vma_limit); 4422 WARN_ON(kernel_vma_start >= kernel_vma_limit);
4380 4423
4381 if (small_vma_start > small_vma_limit || 4424 /*
4382 large_vma_start > large_vma_limit || 4425 * A "user" area only makes sense for the GVA spaces. For VMs where
4426 * there is no "user" area user_vma_start will be equal to
4427 * user_vma_limit (i.e a 0 sized space). In such a situation the kernel
4428 * area must be non-zero in length.
4429 */
4430 if (user_vma_start > user_vma_limit ||
4383 kernel_vma_start >= kernel_vma_limit) { 4431 kernel_vma_start >= kernel_vma_limit) {
4384 err = -EINVAL; 4432 err = -EINVAL;
4385 goto clean_up_pdes; 4433 goto clean_up_pdes;
@@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4389 * Attempt to make a separate VM for fixed allocations. 4437 * Attempt to make a separate VM for fixed allocations.
4390 */ 4438 */
4391 if (g->separate_fixed_allocs && 4439 if (g->separate_fixed_allocs &&
4392 small_vma_start < small_vma_limit) { 4440 user_vma_start < user_vma_limit) {
4393 if (g->separate_fixed_allocs >= small_vma_limit) 4441 if (g->separate_fixed_allocs >= user_vma_limit)
4394 goto clean_up_pdes; 4442 goto clean_up_pdes;
4395 4443
4396 snprintf(alloc_name, sizeof(alloc_name), 4444 snprintf(alloc_name, sizeof(alloc_name),
@@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4398 4446
4399 err = __nvgpu_buddy_allocator_init(g, &vm->fixed, 4447 err = __nvgpu_buddy_allocator_init(g, &vm->fixed,
4400 vm, alloc_name, 4448 vm, alloc_name,
4401 small_vma_start, 4449 user_vma_start,
4402 g->separate_fixed_allocs, 4450 g->separate_fixed_allocs,
4403 SZ_4K, 4451 SZ_4K,
4404 GPU_BALLOC_MAX_ORDER, 4452 GPU_BALLOC_MAX_ORDER,
@@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4407 goto clean_up_ptes; 4455 goto clean_up_ptes;
4408 4456
4409 /* Make sure to update the user vma size. */ 4457 /* Make sure to update the user vma size. */
4410 small_vma_start = g->separate_fixed_allocs; 4458 user_vma_start = g->separate_fixed_allocs;
4411 }
4412
4413 if (small_vma_start < small_vma_limit) {
4414 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
4415 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
4416 err = __nvgpu_buddy_allocator_init(
4417 g,
4418 &vm->vma[gmmu_page_size_small],
4419 vm, alloc_name,
4420 small_vma_start,
4421 small_vma_limit - small_vma_start,
4422 SZ_4K,
4423 GPU_BALLOC_MAX_ORDER,
4424 GPU_ALLOC_GVA_SPACE);
4425 if (err)
4426 goto clean_up_ptes;
4427 } 4459 }
4428 4460
4429 if (large_vma_start < large_vma_limit) { 4461 if (user_vma_start < user_vma_limit) {
4430 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 4462 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
4431 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 4463 if (!gk20a_big_pages_possible(vm, user_vma_start,
4432 err = __nvgpu_buddy_allocator_init( 4464 user_vma_limit - user_vma_start))
4433 g, 4465 vm->big_pages = false;
4434 &vm->vma[gmmu_page_size_big], 4466
4435 vm, alloc_name, 4467 err = __nvgpu_buddy_allocator_init(g, &vm->user,
4436 large_vma_start, 4468 vm, alloc_name,
4437 large_vma_limit - large_vma_start, 4469 user_vma_start,
4438 big_page_size, 4470 user_vma_limit -
4439 GPU_BALLOC_MAX_ORDER, 4471 user_vma_start,
4440 GPU_ALLOC_GVA_SPACE); 4472 SZ_4K,
4473 GPU_BALLOC_MAX_ORDER,
4474 GPU_ALLOC_GVA_SPACE);
4441 if (err) 4475 if (err)
4442 goto clean_up_small_allocator; 4476 goto clean_up_ptes;
4477 } else {
4478 /*
4479 * Make these allocator pointers point to the kernel allocator
4480 * since we still use the legacy notion of page size to choose
4481 * the allocator.
4482 */
4483 vm->vma[0] = &vm->kernel;
4484 vm->vma[1] = &vm->kernel;
4443 } 4485 }
4444 4486
4445 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", 4487 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
4446 name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 4488 if (!gk20a_big_pages_possible(vm, kernel_vma_start,
4447 /* 4489 kernel_vma_limit - kernel_vma_start))
4448 * kernel reserved VMA is at the end of the aperture 4490 vm->big_pages = false;
4449 */ 4491
4450 err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], 4492 err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
4451 vm, alloc_name, 4493 vm, alloc_name,
4452 kernel_vma_start, 4494 kernel_vma_start,
4453 kernel_vma_limit - kernel_vma_start, 4495 kernel_vma_limit - kernel_vma_start,
@@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4455 GPU_BALLOC_MAX_ORDER, 4497 GPU_BALLOC_MAX_ORDER,
4456 GPU_ALLOC_GVA_SPACE); 4498 GPU_ALLOC_GVA_SPACE);
4457 if (err) 4499 if (err)
4458 goto clean_up_big_allocator; 4500 goto clean_up_user_allocator;
4459 4501
4460 vm->mapped_buffers = RB_ROOT; 4502 vm->mapped_buffers = RB_ROOT;
4461 4503
@@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
4471 if (vm->va_limit > SZ_4G) { 4513 if (vm->va_limit > SZ_4G) {
4472 err = gk20a_init_sema_pool(vm); 4514 err = gk20a_init_sema_pool(vm);
4473 if (err) 4515 if (err)
4474 goto clean_up_big_allocator; 4516 goto clean_up_user_allocator;
4475 } 4517 }
4476 4518
4477 return 0; 4519 return 0;
4478 4520
4479clean_up_big_allocator: 4521clean_up_user_allocator:
4480 if (large_vma_start < large_vma_limit) 4522 if (user_vma_start < user_vma_limit)
4481 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4523 nvgpu_alloc_destroy(&vm->user);
4482clean_up_small_allocator:
4483 if (small_vma_start < small_vma_limit)
4484 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4485clean_up_ptes: 4524clean_up_ptes:
4486 free_gmmu_pages(vm, &vm->pdb); 4525 free_gmmu_pages(vm, &vm->pdb);
4487clean_up_pdes: 4526clean_up_pdes:
@@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
4523 vm->as_share = as_share; 4562 vm->as_share = as_share;
4524 vm->enable_ctag = true; 4563 vm->enable_ctag = true;
4525 4564
4526 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 4565 snprintf(name, sizeof(name), "as_%d", as_share->id);
4527 4566
4528 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 4567 err = gk20a_init_vm(mm, vm, big_page_size,
4568 big_page_size << 10,
4529 mm->channel.kernel_size, 4569 mm->channel.kernel_size,
4530 mm->channel.user_size + mm->channel.kernel_size, 4570 mm->channel.user_size + mm->channel.kernel_size,
4531 !mm->disable_bigpage, userspace_managed, name); 4571 !mm->disable_bigpage, userspace_managed, name);
@@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
4586 goto clean_up; 4626 goto clean_up;
4587 } 4627 }
4588 4628
4589 vma = &vm->vma[pgsz_idx]; 4629 vma = vm->vma[pgsz_idx];
4590 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { 4630 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
4591 if (nvgpu_alloc_initialized(&vm->fixed)) 4631 if (nvgpu_alloc_initialized(&vm->fixed))
4592 vma = &vm->fixed; 4632 vma = &vm->fixed;
4593 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, 4633 vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset,
4594 (u64)args->pages * 4634 (u64)args->pages *
4595 (u64)args->page_size); 4635 (u64)args->page_size,
4636 args->page_size);
4596 } else { 4637 } else {
4597 vaddr_start = nvgpu_alloc(vma, 4638 vaddr_start = nvgpu_alloc(vma,
4598 (u64)args->pages * 4639 (u64)args->pages *
@@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
4662 args->pages, args->offset); 4703 args->pages, args->offset);
4663 4704
4664 /* determine pagesz idx */ 4705 /* determine pagesz idx */
4665 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 4706 pgsz_idx = __get_pte_size(vm, args->offset,
4666 gmmu_page_size_big : gmmu_page_size_small; 4707 args->page_size * args->pages);
4667 4708
4668 if (nvgpu_alloc_initialized(&vm->fixed)) 4709 if (nvgpu_alloc_initialized(&vm->fixed))
4669 vma = &vm->fixed; 4710 vma = &vm->fixed;
4670 else 4711 else
4671 vma = &vm->vma[pgsz_idx]; 4712 vma = vm->vma[pgsz_idx];
4672 nvgpu_free(vma, args->offset); 4713 nvgpu_free(vma, args->offset);
4673 4714
4674 mutex_lock(&vm->update_gmmu_lock); 4715 mutex_lock(&vm->update_gmmu_lock);
@@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
4853 4894
4854void gk20a_deinit_vm(struct vm_gk20a *vm) 4895void gk20a_deinit_vm(struct vm_gk20a *vm)
4855{ 4896{
4856 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 4897 if (nvgpu_alloc_initialized(&vm->kernel))
4857 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) 4898 nvgpu_alloc_destroy(&vm->kernel);
4858 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 4899 if (nvgpu_alloc_initialized(&vm->user))
4859 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) 4900 nvgpu_alloc_destroy(&vm->user);
4860 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4861 if (nvgpu_alloc_initialized(&vm->fixed)) 4901 if (nvgpu_alloc_initialized(&vm->fixed))
4862 nvgpu_alloc_destroy(&vm->fixed); 4902 nvgpu_alloc_destroy(&vm->fixed);
4863 4903
@@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
4908 4948
4909 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 4949 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
4910 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 4950 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
4911 gk20a_init_vm(mm, vm, big_page_size, SZ_4K, 4951 gk20a_init_vm(mm, vm,
4912 mm->bar1.aperture_size - SZ_4K, 4952 big_page_size,
4913 mm->bar1.aperture_size, false, false, "bar1"); 4953 SZ_4K, /* Low hole */
4954 mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */
4955 mm->bar1.aperture_size,
4956 true, false,
4957 "bar1");
4914 4958
4915 err = gk20a_alloc_inst_block(g, inst_block); 4959 err = gk20a_alloc_inst_block(g, inst_block);
4916 if (err) 4960 if (err)
@@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
4932 struct gk20a *g = gk20a_from_mm(mm); 4976 struct gk20a *g = gk20a_from_mm(mm);
4933 struct mem_desc *inst_block = &mm->pmu.inst_block; 4977 struct mem_desc *inst_block = &mm->pmu.inst_block;
4934 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 4978 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
4979 u32 low_hole, aperture_size;
4980
4981 /*
4982 * No user region - so we will pass that as zero sized.
4983 */
4984 low_hole = SZ_4K * 16;
4985 aperture_size = GK20A_PMU_VA_SIZE * 2;
4935 4986
4936 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 4987 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
4937 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 4988 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
4938 4989
4939 gk20a_init_vm(mm, vm, big_page_size, 4990 gk20a_init_vm(mm, vm, big_page_size,
4940 SZ_4K * 16, GK20A_PMU_VA_SIZE, 4991 low_hole,
4941 GK20A_PMU_VA_SIZE * 2, false, false, 4992 aperture_size - low_hole,
4993 aperture_size,
4994 true,
4995 false,
4942 "system"); 4996 "system");
4943 4997
4944 err = gk20a_alloc_inst_block(g, inst_block); 4998 err = gk20a_alloc_inst_block(g, inst_block);