summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2015-06-17 13:31:08 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-31 19:23:07 -0500
commitd630f1d99f60b1c2ec87506a2738bac4d1895b07 (patch)
tree5b9cad58f585424a64e7b675d503a87bbcada254 /drivers/gpu/nvgpu/vgpu/mm_vgpu.c
parent793791ebb7ddbb34f0aaf3e300b24ed24aa76661 (diff)
gpu: nvgpu: Unify the small and large page address spaces
The basic structure of this patch is to make the small page allocator and the large page allocator into pointers (where they used to be just structs). Then assign each of those pointers to the same actual allocator since the buddy allocator has supported mixed page sizes since its inception. For the rest of the driver some changes had to be made in order to actually support mixed pages in a single address space. 1. Unifying the allocation page size determination Since the allocation and map operations happen at distinct times both mapping and allocation of GVA space must agree on page size. This is because the allocation has to separate allocations into separate PDEs to avoid the necessity of supporting mixed PDEs. To this end a function __get_pte_size() was introduced which is used both by the balloc code and the core GPU MM code. It determines page size based only on the length of the mapping/ allocation. 2. Fixed address allocation + page size Similar to regular mappings/GVA allocations fixed address mapping page size determination had to be modified. In the past the address of the mapping determined page size since the address space split was by address (low addresses were small pages, high addresses large pages). Since that is no longer the case the page size field in the reserve memory ioctl is now honored by the mapping code. When, for instance, CUDA makes a memory reservation it specifies small or large pages. When CUDA requests mappings to be made within that address range the page size is then looked up in the reserved memory struct. Fixed address reservations were also modified to now always allocate at a PDE granularity (64M or 128M depending on large page size. This prevents non-fixed allocations from ending up in the same PDE and causing kernel panics or GMMU faults. 3. The rest... The rest of the changes are just by products of the above. Lots of places required minor updates to use a pointer to the GVA allocator struct instead of the struct itself. Lastly, this change is not truly complete. More work remains to be done in order to fully remove the notion that there was such a thing as separate address spaces for different page sizes. Basically after this patch what remains is cleanup and proper documentation. Bug 1396644 Bug 1729947 Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1265300 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu/mm_vgpu.c')
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c115
1 files changed, 49 insertions, 66 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 66c9344b..a21a020d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
227 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 227 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
228 WARN_ON(err || msg.ret); 228 WARN_ON(err || msg.ret);
229 229
230 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); 230 if (nvgpu_alloc_initialized(&vm->kernel))
231 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) 231 nvgpu_alloc_destroy(&vm->kernel);
232 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); 232 if (nvgpu_alloc_initialized(&vm->user))
233 if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) 233 nvgpu_alloc_destroy(&vm->user);
234 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 234 if (nvgpu_alloc_initialized(&vm->fixed))
235 nvgpu_alloc_destroy(&vm->fixed);
235 236
236 mutex_unlock(&vm->update_gmmu_lock); 237 mutex_unlock(&vm->update_gmmu_lock);
237 238
@@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
273 struct tegra_vgpu_as_share_params *p = &msg.params.as_share; 274 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
274 struct mm_gk20a *mm = &g->mm; 275 struct mm_gk20a *mm = &g->mm;
275 struct vm_gk20a *vm; 276 struct vm_gk20a *vm;
276 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 277 u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit;
277 kernel_vma_start, kernel_vma_limit;
278 char name[32]; 278 char name[32];
279 int err, i; 279 int err, i;
280 const bool userspace_managed = 280 const bool userspace_managed =
@@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
306 vm->mm = mm; 306 vm->mm = mm;
307 vm->as_share = as_share; 307 vm->as_share = as_share;
308 308
309 /* Set up vma pointers. */
310 vm->vma[0] = &vm->user;
311 vm->vma[1] = &vm->user;
312 vm->vma[2] = &vm->kernel;
313
309 for (i = 0; i < gmmu_nr_page_sizes; i++) 314 for (i = 0; i < gmmu_nr_page_sizes; i++)
310 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; 315 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];
311 316
@@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
328 vm->handle = p->handle; 333 vm->handle = p->handle;
329 334
330 /* setup vma limits */ 335 /* setup vma limits */
331 small_vma_start = vm->va_start; 336 user_vma_start = vm->va_start;
332 337 user_vma_limit = vm->va_limit - mm->channel.kernel_size;
333 if (vm->big_pages) {
334 /* First 16GB of the address space goes towards small
335 * pages. The kernel reserved pages are at the end.
336 * What ever remains is allocated to large pages.
337 */
338 small_vma_limit = __nv_gmmu_va_small_page_limit();
339 large_vma_start = small_vma_limit;
340 large_vma_limit = vm->va_limit - mm->channel.kernel_size;
341 } else {
342 small_vma_limit = vm->va_limit - mm->channel.kernel_size;
343 large_vma_start = 0;
344 large_vma_limit = 0;
345 }
346 338
347 kernel_vma_start = vm->va_limit - mm->channel.kernel_size; 339 kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
348 kernel_vma_limit = vm->va_limit; 340 kernel_vma_limit = vm->va_limit;
349 341
350 gk20a_dbg_info( 342 gk20a_dbg_info(
351 "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", 343 "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
352 small_vma_start, small_vma_limit, 344 user_vma_start, user_vma_limit,
353 large_vma_start, large_vma_limit,
354 kernel_vma_start, kernel_vma_limit); 345 kernel_vma_start, kernel_vma_limit);
355 346
356 /* check that starts do not exceed limits */ 347 WARN_ON(user_vma_start > user_vma_limit);
357 WARN_ON(small_vma_start > small_vma_limit);
358 WARN_ON(large_vma_start > large_vma_limit);
359 /* kernel_vma must also be non-zero */
360 WARN_ON(kernel_vma_start >= kernel_vma_limit); 348 WARN_ON(kernel_vma_start >= kernel_vma_limit);
361 349
362 if (small_vma_start > small_vma_limit || 350 if (user_vma_start > user_vma_limit ||
363 large_vma_start > large_vma_limit ||
364 kernel_vma_start >= kernel_vma_limit) { 351 kernel_vma_start >= kernel_vma_limit) {
365 err = -EINVAL; 352 err = -EINVAL;
366 goto clean_up_share; 353 goto clean_up_share;
367 } 354 }
368 355
369 if (small_vma_start < small_vma_limit) { 356 if (user_vma_start < user_vma_limit) {
370 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 357 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
371 gmmu_page_sizes[gmmu_page_size_small] >> 10); 358 gmmu_page_sizes[gmmu_page_size_small] >> 10);
359 if (!gk20a_big_pages_possible(vm, user_vma_start,
360 user_vma_limit - user_vma_start))
361 vm->big_pages = false;
372 362
373 err = __nvgpu_buddy_allocator_init( 363 err = __nvgpu_buddy_allocator_init(
374 g, 364 g,
375 &vm->vma[gmmu_page_size_small], 365 vm->vma[gmmu_page_size_small],
376 vm, name, 366 vm, name,
377 small_vma_start, 367 user_vma_start,
378 small_vma_limit - small_vma_start, 368 user_vma_limit - user_vma_start,
379 SZ_4K, 369 SZ_4K,
380 GPU_BALLOC_MAX_ORDER, 370 GPU_BALLOC_MAX_ORDER,
381 GPU_ALLOC_GVA_SPACE); 371 GPU_ALLOC_GVA_SPACE);
382 if (err) 372 if (err)
383 goto clean_up_share; 373 goto clean_up_share;
384 } 374 } else {
385 375 /*
386 if (large_vma_start < large_vma_limit) { 376 * Make these allocator pointers point to the kernel allocator
387 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 377 * since we still use the legacy notion of page size to choose
388 gmmu_page_sizes[gmmu_page_size_big] >> 10); 378 * the allocator.
389 err = __nvgpu_buddy_allocator_init( 379 */
390 g, 380 vm->vma[0] = &vm->kernel;
391 &vm->vma[gmmu_page_size_big], 381 vm->vma[1] = &vm->kernel;
392 vm, name,
393 large_vma_start,
394 large_vma_limit - large_vma_start,
395 big_page_size,
396 GPU_BALLOC_MAX_ORDER,
397 GPU_ALLOC_GVA_SPACE);
398 if (err)
399 goto clean_up_small_allocator;
400 } 382 }
401 383
402 snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", 384 snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
403 gmmu_page_sizes[gmmu_page_size_kernel] >> 10); 385 gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
386 if (!gk20a_big_pages_possible(vm, kernel_vma_start,
387 kernel_vma_limit - kernel_vma_start))
388 vm->big_pages = false;
389
404 /* 390 /*
405 * kernel reserved VMA is at the end of the aperture 391 * kernel reserved VMA is at the end of the aperture
406 */ 392 */
407 err = __nvgpu_buddy_allocator_init( 393 err = __nvgpu_buddy_allocator_init(
408 g, 394 g,
409 &vm->vma[gmmu_page_size_kernel], 395 vm->vma[gmmu_page_size_kernel],
410 vm, name, 396 vm, name,
411 kernel_vma_start, 397 kernel_vma_start,
412 kernel_vma_limit - kernel_vma_start, 398 kernel_vma_limit - kernel_vma_start,
413 SZ_4K, 399 SZ_4K,
414 GPU_BALLOC_MAX_ORDER, 400 GPU_BALLOC_MAX_ORDER,
415 GPU_ALLOC_GVA_SPACE); 401 GPU_ALLOC_GVA_SPACE);
416 if (err) 402 if (err)
417 goto clean_up_big_allocator; 403 goto clean_up_user_allocator;
418 404
419 vm->mapped_buffers = RB_ROOT; 405 vm->mapped_buffers = RB_ROOT;
420 406
@@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
426 412
427 return 0; 413 return 0;
428 414
429clean_up_big_allocator: 415clean_up_user_allocator:
430 if (large_vma_start < large_vma_limit) 416 if (user_vma_start < user_vma_limit)
431 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); 417 nvgpu_alloc_destroy(&vm->user);
432clean_up_small_allocator:
433 if (small_vma_start < small_vma_limit)
434 nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]);
435clean_up_share: 418clean_up_share:
436 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; 419 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
437 msg.handle = vgpu_get_handle(g); 420 msg.handle = vgpu_get_handle(g);