summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/vm.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-06-09 14:42:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-06 17:44:16 -0400
commit583704620db88e391f6b14acc57af859a70127de (patch)
tree8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/common/mm/vm.c
parentc1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff)
gpu: nvgpu: Implement PD packing
In some cases page directories require less than a full page of memory. For example, on Pascal, the final PD level for large pages is only 256 bytes; thus 16 PDs can fit in a single page. To allocate an entire page for each of these 256 B PDs is extremely wasteful. This patch aims to alleviate the wasted DMA memory from having small PDs in a full page by packing multiple small PDs into a single page. The packing is implemented as a slab allocator - each page is a slab and from each page multiple PD instances can be allocated. Several modifications to the nvgpu_gmmu_pd struct also needed to be made to support this. The nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem struct so that each nvgpu_gmmu_pd knows what portion of the memory it's using. The nvgpu_pde_phys_addr() function and the pd_write() functions also require some changes since the PD no longer is always situated at the start of the nvgpu_mem. Initialization and cleanup of the page tables for each VM was slightly modified to work through the new pd_cache implementation. Some PDs (i.e the PDB), despite not being a full page, still require a full page for alignment purposes (HW requirements). Thus a direct allocation method for PDs is still provided. This is also used when a PD that could in principle be cached is greater than a page in size. Lastly a new debug flag was added for the pd_cache code. JIRA NVGPU-30 Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1506610 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vm.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c50
1 files changed, 35 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 3aeba500..3ed3c7fe 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -35,21 +35,42 @@ int vm_aspace_id(struct vm_gk20a *vm)
35 return vm->as_share ? vm->as_share->id : -1; 35 return vm->as_share ? vm->as_share->id : -1;
36} 36}
37 37
38static void nvgpu_vm_free_entries(struct vm_gk20a *vm, 38static void __nvgpu_vm_free_entries(struct vm_gk20a *vm,
39 struct nvgpu_gmmu_pd *parent, 39 struct nvgpu_gmmu_pd *pd,
40 int level) 40 int level)
41{ 41{
42 int i; 42 int i;
43 43
44 if (parent->entries) 44 if (pd->mem) {
45 for (i = 0; i < parent->num_entries; i++) 45 __nvgpu_pd_free(vm, pd);
46 nvgpu_vm_free_entries(vm, &parent->entries[i], 46 pd->mem = NULL;
47 }
48
49 if (pd->entries) {
50 for (i = 0; i < pd->num_entries; i++)
51 __nvgpu_vm_free_entries(vm, &pd->entries[i],
47 level + 1); 52 level + 1);
53 nvgpu_vfree(vm->mm->g, pd->entries);
54 pd->entries = NULL;
55 }
56}
57
58static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
59 struct nvgpu_gmmu_pd *pdb)
60{
61 struct gk20a *g = vm->mm->g;
62 int i;
63
64 __nvgpu_pd_cache_free_direct(g, pdb);
65
66 if (!pdb->entries)
67 return;
68
69 for (i = 0; i < pdb->num_entries; i++)
70 __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1);
48 71
49 if (parent->mem.size) 72 nvgpu_vfree(g, pdb->entries);
50 nvgpu_free_gmmu_pages(vm, parent); 73 pdb->entries = NULL;
51 nvgpu_vfree(vm->mm->g, parent->entries);
52 parent->entries = NULL;
53} 74}
54 75
55u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, 76u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
@@ -110,7 +131,7 @@ void nvgpu_vm_mapping_batch_finish_locked(
110 131
111 if (mapping_batch->need_tlb_invalidate) { 132 if (mapping_batch->need_tlb_invalidate) {
112 struct gk20a *g = gk20a_from_vm(vm); 133 struct gk20a *g = gk20a_from_vm(vm);
113 g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); 134 g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
114 } 135 }
115} 136}
116 137
@@ -407,9 +428,8 @@ clean_up_allocators:
407 if (nvgpu_alloc_initialized(&vm->user_lp)) 428 if (nvgpu_alloc_initialized(&vm->user_lp))
408 nvgpu_alloc_destroy(&vm->user_lp); 429 nvgpu_alloc_destroy(&vm->user_lp);
409clean_up_page_tables: 430clean_up_page_tables:
410 /* Cleans up nvgpu_vm_init_page_tables() */ 431 /* Cleans up nvgpu_gmmu_init_page_table() */
411 nvgpu_vfree(g, vm->pdb.entries); 432 __nvgpu_pd_cache_free_direct(g, &vm->pdb);
412 nvgpu_free_gmmu_pages(vm, &vm->pdb);
413clean_up_vgpu_vm: 433clean_up_vgpu_vm:
414#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 434#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
415 if (g->is_virtual) 435 if (g->is_virtual)
@@ -525,7 +545,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
525 if (nvgpu_alloc_initialized(&vm->user_lp)) 545 if (nvgpu_alloc_initialized(&vm->user_lp))
526 nvgpu_alloc_destroy(&vm->user_lp); 546 nvgpu_alloc_destroy(&vm->user_lp);
527 547
528 nvgpu_vm_free_entries(vm, &vm->pdb, 0); 548 nvgpu_vm_free_entries(vm, &vm->pdb);
529 549
530#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 550#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
531 if (g->is_virtual) 551 if (g->is_virtual)