diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 282 |
1 files changed, 168 insertions, 114 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index cdbaef79..83bbcb54 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
1001 | mutex_init(&mm->l2_op_lock); | 1001 | mutex_init(&mm->l2_op_lock); |
1002 | 1002 | ||
1003 | /*TBD: make channel vm size configurable */ | 1003 | /*TBD: make channel vm size configurable */ |
1004 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; | 1004 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - |
1005 | NV_MM_DEFAULT_KERNEL_SIZE; | ||
1005 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | 1006 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; |
1006 | 1007 | ||
1007 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", | 1008 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", |
@@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
1626 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 1627 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
1627 | 1628 | ||
1628 | { | 1629 | { |
1629 | struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 1630 | struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx]; |
1630 | u64 offset; | 1631 | u64 offset; |
1631 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 1632 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
1632 | 1633 | ||
@@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
1663 | u64 offset, u64 size, | 1664 | u64 offset, u64 size, |
1664 | enum gmmu_pgsz_gk20a pgsz_idx) | 1665 | enum gmmu_pgsz_gk20a pgsz_idx) |
1665 | { | 1666 | { |
1666 | struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; | 1667 | struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; |
1667 | 1668 | ||
1668 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 1669 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
1669 | vma->name, offset, size); | 1670 | vma->name, offset, size); |
@@ -1790,13 +1791,7 @@ struct buffer_attrs { | |||
1790 | static void gmmu_select_page_size(struct vm_gk20a *vm, | 1791 | static void gmmu_select_page_size(struct vm_gk20a *vm, |
1791 | struct buffer_attrs *bfr) | 1792 | struct buffer_attrs *bfr) |
1792 | { | 1793 | { |
1793 | int i; | 1794 | bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size); |
1794 | /* choose the biggest first (top->bottom) */ | ||
1795 | for (i = gmmu_page_size_kernel - 1; i >= 0; i--) | ||
1796 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { | ||
1797 | bfr->pgsz_idx = i; | ||
1798 | break; | ||
1799 | } | ||
1800 | } | 1795 | } |
1801 | 1796 | ||
1802 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | 1797 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, |
@@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
2497 | * the alignment determined by gmmu_select_page_size(). | 2492 | * the alignment determined by gmmu_select_page_size(). |
2498 | */ | 2493 | */ |
2499 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { | 2494 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { |
2500 | int pgsz_idx = | 2495 | int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size); |
2501 | __nv_gmmu_va_is_big_page_region(vm, offset_align) ? | ||
2502 | gmmu_page_size_big : gmmu_page_size_small; | ||
2503 | if (pgsz_idx > bfr.pgsz_idx) { | 2496 | if (pgsz_idx > bfr.pgsz_idx) { |
2504 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", | 2497 | gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", |
2505 | offset_align, bfr.pgsz_idx, pgsz_idx); | 2498 | offset_align, bfr.pgsz_idx, pgsz_idx); |
@@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, | |||
3149 | u64 addr = 0; | 3142 | u64 addr = 0; |
3150 | 3143 | ||
3151 | if (at) | 3144 | if (at) |
3152 | addr = nvgpu_alloc_fixed(allocator, at, size); | 3145 | addr = nvgpu_alloc_fixed(allocator, at, size, 0); |
3153 | else | 3146 | else |
3154 | addr = nvgpu_alloc(allocator, size); | 3147 | addr = nvgpu_alloc(allocator, size); |
3155 | 3148 | ||
@@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4260 | * | 4253 | * |
4261 | * !!! TODO: cleanup. | 4254 | * !!! TODO: cleanup. |
4262 | */ | 4255 | */ |
4263 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], | 4256 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, |
4264 | vm->va_limit - | 4257 | vm->va_limit - |
4265 | mm->channel.kernel_size, | 4258 | mm->channel.kernel_size, |
4266 | 512 * PAGE_SIZE); | 4259 | 512 * PAGE_SIZE, |
4260 | SZ_4K); | ||
4267 | if (!sema_sea->gpu_va) { | 4261 | if (!sema_sea->gpu_va) { |
4268 | nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); | 4262 | nvgpu_free(&vm->kernel, sema_sea->gpu_va); |
4269 | gk20a_vm_put(vm); | 4263 | gk20a_vm_put(vm); |
4270 | return -ENOMEM; | 4264 | return -ENOMEM; |
4271 | } | 4265 | } |
@@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) | |||
4273 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); | 4267 | err = gk20a_semaphore_pool_map(vm->sema_pool, vm); |
4274 | if (err) { | 4268 | if (err) { |
4275 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); | 4269 | gk20a_semaphore_pool_unmap(vm->sema_pool, vm); |
4276 | nvgpu_free(&vm->vma[gmmu_page_size_small], | 4270 | nvgpu_free(vm->vma[gmmu_page_size_small], |
4277 | vm->sema_pool->gpu_va); | 4271 | vm->sema_pool->gpu_va); |
4278 | gk20a_vm_put(vm); | 4272 | gk20a_vm_put(vm); |
4279 | } | 4273 | } |
4280 | 4274 | ||
4281 | return 0; | 4275 | return 0; |
4282 | } | 4276 | } |
4283 | 4277 | ||
4278 | /* | ||
4279 | * Determine if the passed address space can support big pages or not. | ||
4280 | */ | ||
4281 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | ||
4282 | { | ||
4283 | u64 mask = ((u64)vm->big_page_size << 10) - 1; | ||
4284 | |||
4285 | if (base & mask || size & mask) | ||
4286 | return 0; | ||
4287 | return 1; | ||
4288 | } | ||
4289 | |||
4290 | /* | ||
4291 | * Attempt to find a reserved memory area to determine PTE size for the passed | ||
4292 | * mapping. If no reserved area can be found use small pages but drop a warning. | ||
4293 | */ | ||
4294 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | ||
4295 | u64 base, u64 size) | ||
4296 | { | ||
4297 | struct vm_reserved_va_node *node; | ||
4298 | |||
4299 | node = addr_to_reservation(vm, base); | ||
4300 | if (!node) | ||
4301 | return gmmu_page_size_small; | ||
4302 | |||
4303 | return node->pgsz_idx; | ||
4304 | } | ||
4305 | |||
4306 | /** | ||
4307 | * gk20a_init_vm() - Initialize an address space. | ||
4308 | * | ||
4309 | * @mm - Parent MM. | ||
4310 | * @vm - The VM to init. | ||
4311 | * @big_page_size - Size of big pages associated with this VM. | ||
4312 | * @low_hole - The size of the low hole (unaddressable memory at the bottom of | ||
4313 | * the address space. | ||
4314 | * @kernel_reserved - Space reserved for kernel only allocations. | ||
4315 | * @aperture_size - Total size of the aperture. | ||
4316 | * @big_pages - Ignored. Will be set based on other passed params. | ||
4317 | * @name - Name of the address space. | ||
4318 | * | ||
4319 | * This function initializes an address space according to the following map: | ||
4320 | * | ||
4321 | * +--+ 0x0 | ||
4322 | * | | | ||
4323 | * +--+ @low_hole | ||
4324 | * | | | ||
4325 | * ~ ~ This is the "user" section. | ||
4326 | * | | | ||
4327 | * +--+ @aperture_size - @kernel_reserved | ||
4328 | * | | | ||
4329 | * ~ ~ This is the "kernel" section. | ||
4330 | * | | | ||
4331 | * +--+ @aperture_size | ||
4332 | * | ||
4333 | * The user section is therefor what ever is left over after the @low_hole and | ||
4334 | * @kernel_reserved memory have been portioned out. The @kernel_reserved is | ||
4335 | * always persent at the top of the memory space and the @low_hole is always at | ||
4336 | * the bottom. | ||
4337 | * | ||
4338 | * For certain address spaces a "user" section makes no sense (bar1, etc) so in | ||
4339 | * such cases the @kernel_reserved and @low_hole should sum to exactly | ||
4340 | * @aperture_size. | ||
4341 | */ | ||
4284 | int gk20a_init_vm(struct mm_gk20a *mm, | 4342 | int gk20a_init_vm(struct mm_gk20a *mm, |
4285 | struct vm_gk20a *vm, | 4343 | struct vm_gk20a *vm, |
4286 | u32 big_page_size, | 4344 | u32 big_page_size, |
@@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4293 | { | 4351 | { |
4294 | int err, i; | 4352 | int err, i; |
4295 | char alloc_name[32]; | 4353 | char alloc_name[32]; |
4296 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 4354 | u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; |
4297 | kernel_vma_start, kernel_vma_limit; | ||
4298 | u32 pde_lo, pde_hi; | 4355 | u32 pde_lo, pde_hi; |
4299 | struct gk20a *g = mm->g; | 4356 | struct gk20a *g = mm->g; |
4300 | 4357 | ||
4301 | /* note: this must match gmmu_pgsz_gk20a enum */ | 4358 | /* note: this must match gmmu_pgsz_gk20a enum */ |
4302 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; | 4359 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
4303 | 4360 | ||
4304 | WARN_ON(kernel_reserved + low_hole > aperture_size); | 4361 | if (WARN_ON(kernel_reserved + low_hole > aperture_size)) |
4305 | if (kernel_reserved > aperture_size) | ||
4306 | return -ENOMEM; | 4362 | return -ENOMEM; |
4307 | 4363 | ||
4308 | vm->mm = mm; | 4364 | vm->mm = mm; |
4309 | 4365 | ||
4366 | /* Set up vma pointers. */ | ||
4367 | vm->vma[0] = &vm->user; | ||
4368 | vm->vma[1] = &vm->user; | ||
4369 | vm->vma[2] = &vm->kernel; | ||
4370 | |||
4310 | vm->va_start = low_hole; | 4371 | vm->va_start = low_hole; |
4311 | vm->va_limit = aperture_size; | 4372 | vm->va_limit = aperture_size; |
4312 | vm->big_pages = big_pages; | 4373 | vm->big_pages = big_pages; |
@@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4321 | 4382 | ||
4322 | gk20a_dbg_info("small page-size (%dKB)", | 4383 | gk20a_dbg_info("small page-size (%dKB)", |
4323 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | 4384 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
4324 | 4385 | gk20a_dbg_info("big page-size (%dKB) (%s)\n", | |
4325 | gk20a_dbg_info("big page-size (%dKB)", | 4386 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name); |
4326 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | ||
4327 | |||
4328 | gk20a_dbg_info("kernel page-size (%dKB)", | 4387 | gk20a_dbg_info("kernel page-size (%dKB)", |
4329 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 4388 | vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
4330 | 4389 | ||
@@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4348 | goto clean_up_pdes; | 4407 | goto clean_up_pdes; |
4349 | 4408 | ||
4350 | /* setup vma limits */ | 4409 | /* setup vma limits */ |
4351 | small_vma_start = low_hole; | 4410 | user_vma_start = low_hole; |
4352 | 4411 | user_vma_limit = vm->va_limit - kernel_reserved; | |
4353 | if (big_pages) { | ||
4354 | /* First 16GB of the address space goes towards small | ||
4355 | * pages. What ever remains is allocated to large | ||
4356 | * pages. */ | ||
4357 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
4358 | large_vma_start = small_vma_limit; | ||
4359 | large_vma_limit = vm->va_limit - kernel_reserved; | ||
4360 | } else { | ||
4361 | small_vma_limit = vm->va_limit - kernel_reserved; | ||
4362 | large_vma_start = 0; | ||
4363 | large_vma_limit = 0; | ||
4364 | } | ||
4365 | 4412 | ||
4366 | kernel_vma_start = vm->va_limit - kernel_reserved; | 4413 | kernel_vma_start = vm->va_limit - kernel_reserved; |
4367 | kernel_vma_limit = vm->va_limit; | 4414 | kernel_vma_limit = vm->va_limit; |
4368 | 4415 | ||
4369 | gk20a_dbg_info( | 4416 | gk20a_dbg_info( |
4370 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", | 4417 | "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", |
4371 | small_vma_start, small_vma_limit, | 4418 | user_vma_start, user_vma_limit, |
4372 | large_vma_start, large_vma_limit, | ||
4373 | kernel_vma_start, kernel_vma_limit); | 4419 | kernel_vma_start, kernel_vma_limit); |
4374 | 4420 | ||
4375 | /* check that starts do not exceed limits */ | 4421 | WARN_ON(user_vma_start > user_vma_limit); |
4376 | WARN_ON(small_vma_start > small_vma_limit); | ||
4377 | WARN_ON(large_vma_start > large_vma_limit); | ||
4378 | /* kernel_vma must also be non-zero */ | ||
4379 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | 4422 | WARN_ON(kernel_vma_start >= kernel_vma_limit); |
4380 | 4423 | ||
4381 | if (small_vma_start > small_vma_limit || | 4424 | /* |
4382 | large_vma_start > large_vma_limit || | 4425 | * A "user" area only makes sense for the GVA spaces. For VMs where |
4426 | * there is no "user" area user_vma_start will be equal to | ||
4427 | * user_vma_limit (i.e a 0 sized space). In such a situation the kernel | ||
4428 | * area must be non-zero in length. | ||
4429 | */ | ||
4430 | if (user_vma_start > user_vma_limit || | ||
4383 | kernel_vma_start >= kernel_vma_limit) { | 4431 | kernel_vma_start >= kernel_vma_limit) { |
4384 | err = -EINVAL; | 4432 | err = -EINVAL; |
4385 | goto clean_up_pdes; | 4433 | goto clean_up_pdes; |
@@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4389 | * Attempt to make a separate VM for fixed allocations. | 4437 | * Attempt to make a separate VM for fixed allocations. |
4390 | */ | 4438 | */ |
4391 | if (g->separate_fixed_allocs && | 4439 | if (g->separate_fixed_allocs && |
4392 | small_vma_start < small_vma_limit) { | 4440 | user_vma_start < user_vma_limit) { |
4393 | if (g->separate_fixed_allocs >= small_vma_limit) | 4441 | if (g->separate_fixed_allocs >= user_vma_limit) |
4394 | goto clean_up_pdes; | 4442 | goto clean_up_pdes; |
4395 | 4443 | ||
4396 | snprintf(alloc_name, sizeof(alloc_name), | 4444 | snprintf(alloc_name, sizeof(alloc_name), |
@@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4398 | 4446 | ||
4399 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, | 4447 | err = __nvgpu_buddy_allocator_init(g, &vm->fixed, |
4400 | vm, alloc_name, | 4448 | vm, alloc_name, |
4401 | small_vma_start, | 4449 | user_vma_start, |
4402 | g->separate_fixed_allocs, | 4450 | g->separate_fixed_allocs, |
4403 | SZ_4K, | 4451 | SZ_4K, |
4404 | GPU_BALLOC_MAX_ORDER, | 4452 | GPU_BALLOC_MAX_ORDER, |
@@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4407 | goto clean_up_ptes; | 4455 | goto clean_up_ptes; |
4408 | 4456 | ||
4409 | /* Make sure to update the user vma size. */ | 4457 | /* Make sure to update the user vma size. */ |
4410 | small_vma_start = g->separate_fixed_allocs; | 4458 | user_vma_start = g->separate_fixed_allocs; |
4411 | } | ||
4412 | |||
4413 | if (small_vma_start < small_vma_limit) { | ||
4414 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | ||
4415 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | ||
4416 | err = __nvgpu_buddy_allocator_init( | ||
4417 | g, | ||
4418 | &vm->vma[gmmu_page_size_small], | ||
4419 | vm, alloc_name, | ||
4420 | small_vma_start, | ||
4421 | small_vma_limit - small_vma_start, | ||
4422 | SZ_4K, | ||
4423 | GPU_BALLOC_MAX_ORDER, | ||
4424 | GPU_ALLOC_GVA_SPACE); | ||
4425 | if (err) | ||
4426 | goto clean_up_ptes; | ||
4427 | } | 4459 | } |
4428 | 4460 | ||
4429 | if (large_vma_start < large_vma_limit) { | 4461 | if (user_vma_start < user_vma_limit) { |
4430 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 4462 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); |
4431 | name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); | 4463 | if (!gk20a_big_pages_possible(vm, user_vma_start, |
4432 | err = __nvgpu_buddy_allocator_init( | 4464 | user_vma_limit - user_vma_start)) |
4433 | g, | 4465 | vm->big_pages = false; |
4434 | &vm->vma[gmmu_page_size_big], | 4466 | |
4435 | vm, alloc_name, | 4467 | err = __nvgpu_buddy_allocator_init(g, &vm->user, |
4436 | large_vma_start, | 4468 | vm, alloc_name, |
4437 | large_vma_limit - large_vma_start, | 4469 | user_vma_start, |
4438 | big_page_size, | 4470 | user_vma_limit - |
4439 | GPU_BALLOC_MAX_ORDER, | 4471 | user_vma_start, |
4440 | GPU_ALLOC_GVA_SPACE); | 4472 | SZ_4K, |
4473 | GPU_BALLOC_MAX_ORDER, | ||
4474 | GPU_ALLOC_GVA_SPACE); | ||
4441 | if (err) | 4475 | if (err) |
4442 | goto clean_up_small_allocator; | 4476 | goto clean_up_ptes; |
4477 | } else { | ||
4478 | /* | ||
4479 | * Make these allocator pointers point to the kernel allocator | ||
4480 | * since we still use the legacy notion of page size to choose | ||
4481 | * the allocator. | ||
4482 | */ | ||
4483 | vm->vma[0] = &vm->kernel; | ||
4484 | vm->vma[1] = &vm->kernel; | ||
4443 | } | 4485 | } |
4444 | 4486 | ||
4445 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", | 4487 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); |
4446 | name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 4488 | if (!gk20a_big_pages_possible(vm, kernel_vma_start, |
4447 | /* | 4489 | kernel_vma_limit - kernel_vma_start)) |
4448 | * kernel reserved VMA is at the end of the aperture | 4490 | vm->big_pages = false; |
4449 | */ | 4491 | |
4450 | err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], | 4492 | err = __nvgpu_buddy_allocator_init(g, &vm->kernel, |
4451 | vm, alloc_name, | 4493 | vm, alloc_name, |
4452 | kernel_vma_start, | 4494 | kernel_vma_start, |
4453 | kernel_vma_limit - kernel_vma_start, | 4495 | kernel_vma_limit - kernel_vma_start, |
@@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4455 | GPU_BALLOC_MAX_ORDER, | 4497 | GPU_BALLOC_MAX_ORDER, |
4456 | GPU_ALLOC_GVA_SPACE); | 4498 | GPU_ALLOC_GVA_SPACE); |
4457 | if (err) | 4499 | if (err) |
4458 | goto clean_up_big_allocator; | 4500 | goto clean_up_user_allocator; |
4459 | 4501 | ||
4460 | vm->mapped_buffers = RB_ROOT; | 4502 | vm->mapped_buffers = RB_ROOT; |
4461 | 4503 | ||
@@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
4471 | if (vm->va_limit > SZ_4G) { | 4513 | if (vm->va_limit > SZ_4G) { |
4472 | err = gk20a_init_sema_pool(vm); | 4514 | err = gk20a_init_sema_pool(vm); |
4473 | if (err) | 4515 | if (err) |
4474 | goto clean_up_big_allocator; | 4516 | goto clean_up_user_allocator; |
4475 | } | 4517 | } |
4476 | 4518 | ||
4477 | return 0; | 4519 | return 0; |
4478 | 4520 | ||
4479 | clean_up_big_allocator: | 4521 | clean_up_user_allocator: |
4480 | if (large_vma_start < large_vma_limit) | 4522 | if (user_vma_start < user_vma_limit) |
4481 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4523 | nvgpu_alloc_destroy(&vm->user); |
4482 | clean_up_small_allocator: | ||
4483 | if (small_vma_start < small_vma_limit) | ||
4484 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
4485 | clean_up_ptes: | 4524 | clean_up_ptes: |
4486 | free_gmmu_pages(vm, &vm->pdb); | 4525 | free_gmmu_pages(vm, &vm->pdb); |
4487 | clean_up_pdes: | 4526 | clean_up_pdes: |
@@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, | |||
4523 | vm->as_share = as_share; | 4562 | vm->as_share = as_share; |
4524 | vm->enable_ctag = true; | 4563 | vm->enable_ctag = true; |
4525 | 4564 | ||
4526 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 4565 | snprintf(name, sizeof(name), "as_%d", as_share->id); |
4527 | 4566 | ||
4528 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | 4567 | err = gk20a_init_vm(mm, vm, big_page_size, |
4568 | big_page_size << 10, | ||
4529 | mm->channel.kernel_size, | 4569 | mm->channel.kernel_size, |
4530 | mm->channel.user_size + mm->channel.kernel_size, | 4570 | mm->channel.user_size + mm->channel.kernel_size, |
4531 | !mm->disable_bigpage, userspace_managed, name); | 4571 | !mm->disable_bigpage, userspace_managed, name); |
@@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
4586 | goto clean_up; | 4626 | goto clean_up; |
4587 | } | 4627 | } |
4588 | 4628 | ||
4589 | vma = &vm->vma[pgsz_idx]; | 4629 | vma = vm->vma[pgsz_idx]; |
4590 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { | 4630 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { |
4591 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4631 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4592 | vma = &vm->fixed; | 4632 | vma = &vm->fixed; |
4593 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, | 4633 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, |
4594 | (u64)args->pages * | 4634 | (u64)args->pages * |
4595 | (u64)args->page_size); | 4635 | (u64)args->page_size, |
4636 | args->page_size); | ||
4596 | } else { | 4637 | } else { |
4597 | vaddr_start = nvgpu_alloc(vma, | 4638 | vaddr_start = nvgpu_alloc(vma, |
4598 | (u64)args->pages * | 4639 | (u64)args->pages * |
@@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
4662 | args->pages, args->offset); | 4703 | args->pages, args->offset); |
4663 | 4704 | ||
4664 | /* determine pagesz idx */ | 4705 | /* determine pagesz idx */ |
4665 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? | 4706 | pgsz_idx = __get_pte_size(vm, args->offset, |
4666 | gmmu_page_size_big : gmmu_page_size_small; | 4707 | args->page_size * args->pages); |
4667 | 4708 | ||
4668 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4709 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4669 | vma = &vm->fixed; | 4710 | vma = &vm->fixed; |
4670 | else | 4711 | else |
4671 | vma = &vm->vma[pgsz_idx]; | 4712 | vma = vm->vma[pgsz_idx]; |
4672 | nvgpu_free(vma, args->offset); | 4713 | nvgpu_free(vma, args->offset); |
4673 | 4714 | ||
4674 | mutex_lock(&vm->update_gmmu_lock); | 4715 | mutex_lock(&vm->update_gmmu_lock); |
@@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, | |||
4853 | 4894 | ||
4854 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 4895 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
4855 | { | 4896 | { |
4856 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); | 4897 | if (nvgpu_alloc_initialized(&vm->kernel)) |
4857 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) | 4898 | nvgpu_alloc_destroy(&vm->kernel); |
4858 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); | 4899 | if (nvgpu_alloc_initialized(&vm->user)) |
4859 | if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) | 4900 | nvgpu_alloc_destroy(&vm->user); |
4860 | nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); | ||
4861 | if (nvgpu_alloc_initialized(&vm->fixed)) | 4901 | if (nvgpu_alloc_initialized(&vm->fixed)) |
4862 | nvgpu_alloc_destroy(&vm->fixed); | 4902 | nvgpu_alloc_destroy(&vm->fixed); |
4863 | 4903 | ||
@@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
4908 | 4948 | ||
4909 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 4949 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
4910 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 4950 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
4911 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, | 4951 | gk20a_init_vm(mm, vm, |
4912 | mm->bar1.aperture_size - SZ_4K, | 4952 | big_page_size, |
4913 | mm->bar1.aperture_size, false, false, "bar1"); | 4953 | SZ_4K, /* Low hole */ |
4954 | mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */ | ||
4955 | mm->bar1.aperture_size, | ||
4956 | true, false, | ||
4957 | "bar1"); | ||
4914 | 4958 | ||
4915 | err = gk20a_alloc_inst_block(g, inst_block); | 4959 | err = gk20a_alloc_inst_block(g, inst_block); |
4916 | if (err) | 4960 | if (err) |
@@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
4932 | struct gk20a *g = gk20a_from_mm(mm); | 4976 | struct gk20a *g = gk20a_from_mm(mm); |
4933 | struct mem_desc *inst_block = &mm->pmu.inst_block; | 4977 | struct mem_desc *inst_block = &mm->pmu.inst_block; |
4934 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | 4978 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; |
4979 | u32 low_hole, aperture_size; | ||
4980 | |||
4981 | /* | ||
4982 | * No user region - so we will pass that as zero sized. | ||
4983 | */ | ||
4984 | low_hole = SZ_4K * 16; | ||
4985 | aperture_size = GK20A_PMU_VA_SIZE * 2; | ||
4935 | 4986 | ||
4936 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | 4987 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; |
4937 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 4988 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
4938 | 4989 | ||
4939 | gk20a_init_vm(mm, vm, big_page_size, | 4990 | gk20a_init_vm(mm, vm, big_page_size, |
4940 | SZ_4K * 16, GK20A_PMU_VA_SIZE, | 4991 | low_hole, |
4941 | GK20A_PMU_VA_SIZE * 2, false, false, | 4992 | aperture_size - low_hole, |
4993 | aperture_size, | ||
4994 | true, | ||
4995 | false, | ||
4942 | "system"); | 4996 | "system"); |
4943 | 4997 | ||
4944 | err = gk20a_alloc_inst_block(g, inst_block); | 4998 | err = gk20a_alloc_inst_block(g, inst_block); |