diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-06-09 14:42:50 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-06 17:44:16 -0400 |
commit | 583704620db88e391f6b14acc57af859a70127de (patch) | |
tree | 8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/common/mm/vm.c | |
parent | c1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff) |
gpu: nvgpu: Implement PD packing
In some cases page directories require less than a full page of memory.
For example, on Pascal, the final PD level for large pages is only 256 bytes;
thus 16 PDs can fit in a single page. To allocate an entire page for each of
these 256 B PDs is extremely wasteful. This patch aims to alleviate the
wasted DMA memory from having small PDs in a full page by packing multiple
small PDs into a single page.
The packing is implemented as a slab allocator - each page is a slab and
from each page multiple PD instances can be allocated. Several modifications
to the nvgpu_gmmu_pd struct also needed to be made to support this. The
nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem
struct so that each nvgpu_gmmu_pd knows what portion of the memory it's
using.
The nvgpu_pde_phys_addr() function and the pd_write() functions also require
some changes since the PD no longer is always situated at the start of the
nvgpu_mem.
Initialization and cleanup of the page tables for each VM was slightly
modified to work through the new pd_cache implementation. Some PDs (i.e
the PDB), despite not being a full page, still require a full page for
alignment purposes (HW requirements). Thus a direct allocation method for
PDs is still provided. This is also used when a PD that could in principle
be cached is greater than a page in size.
Lastly a new debug flag was added for the pd_cache code.
JIRA NVGPU-30
Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master/r/1506610
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vm.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 50 |
1 files changed, 35 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3aeba500..3ed3c7fe 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -35,21 +35,42 @@ int vm_aspace_id(struct vm_gk20a *vm) | |||
35 | return vm->as_share ? vm->as_share->id : -1; | 35 | return vm->as_share ? vm->as_share->id : -1; |
36 | } | 36 | } |
37 | 37 | ||
38 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | 38 | static void __nvgpu_vm_free_entries(struct vm_gk20a *vm, |
39 | struct nvgpu_gmmu_pd *parent, | 39 | struct nvgpu_gmmu_pd *pd, |
40 | int level) | 40 | int level) |
41 | { | 41 | { |
42 | int i; | 42 | int i; |
43 | 43 | ||
44 | if (parent->entries) | 44 | if (pd->mem) { |
45 | for (i = 0; i < parent->num_entries; i++) | 45 | __nvgpu_pd_free(vm, pd); |
46 | nvgpu_vm_free_entries(vm, &parent->entries[i], | 46 | pd->mem = NULL; |
47 | } | ||
48 | |||
49 | if (pd->entries) { | ||
50 | for (i = 0; i < pd->num_entries; i++) | ||
51 | __nvgpu_vm_free_entries(vm, &pd->entries[i], | ||
47 | level + 1); | 52 | level + 1); |
53 | nvgpu_vfree(vm->mm->g, pd->entries); | ||
54 | pd->entries = NULL; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | ||
59 | struct nvgpu_gmmu_pd *pdb) | ||
60 | { | ||
61 | struct gk20a *g = vm->mm->g; | ||
62 | int i; | ||
63 | |||
64 | __nvgpu_pd_cache_free_direct(g, pdb); | ||
65 | |||
66 | if (!pdb->entries) | ||
67 | return; | ||
68 | |||
69 | for (i = 0; i < pdb->num_entries; i++) | ||
70 | __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1); | ||
48 | 71 | ||
49 | if (parent->mem.size) | 72 | nvgpu_vfree(g, pdb->entries); |
50 | nvgpu_free_gmmu_pages(vm, parent); | 73 | pdb->entries = NULL; |
51 | nvgpu_vfree(vm->mm->g, parent->entries); | ||
52 | parent->entries = NULL; | ||
53 | } | 74 | } |
54 | 75 | ||
55 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, | 76 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, |
@@ -110,7 +131,7 @@ void nvgpu_vm_mapping_batch_finish_locked( | |||
110 | 131 | ||
111 | if (mapping_batch->need_tlb_invalidate) { | 132 | if (mapping_batch->need_tlb_invalidate) { |
112 | struct gk20a *g = gk20a_from_vm(vm); | 133 | struct gk20a *g = gk20a_from_vm(vm); |
113 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | 134 | g->ops.fb.tlb_invalidate(g, vm->pdb.mem); |
114 | } | 135 | } |
115 | } | 136 | } |
116 | 137 | ||
@@ -407,9 +428,8 @@ clean_up_allocators: | |||
407 | if (nvgpu_alloc_initialized(&vm->user_lp)) | 428 | if (nvgpu_alloc_initialized(&vm->user_lp)) |
408 | nvgpu_alloc_destroy(&vm->user_lp); | 429 | nvgpu_alloc_destroy(&vm->user_lp); |
409 | clean_up_page_tables: | 430 | clean_up_page_tables: |
410 | /* Cleans up nvgpu_vm_init_page_tables() */ | 431 | /* Cleans up nvgpu_gmmu_init_page_table() */ |
411 | nvgpu_vfree(g, vm->pdb.entries); | 432 | __nvgpu_pd_cache_free_direct(g, &vm->pdb); |
412 | nvgpu_free_gmmu_pages(vm, &vm->pdb); | ||
413 | clean_up_vgpu_vm: | 433 | clean_up_vgpu_vm: |
414 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 434 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
415 | if (g->is_virtual) | 435 | if (g->is_virtual) |
@@ -525,7 +545,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm) | |||
525 | if (nvgpu_alloc_initialized(&vm->user_lp)) | 545 | if (nvgpu_alloc_initialized(&vm->user_lp)) |
526 | nvgpu_alloc_destroy(&vm->user_lp); | 546 | nvgpu_alloc_destroy(&vm->user_lp); |
527 | 547 | ||
528 | nvgpu_vm_free_entries(vm, &vm->pdb, 0); | 548 | nvgpu_vm_free_entries(vm, &vm->pdb); |
529 | 549 | ||
530 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 550 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
531 | if (g->is_virtual) | 551 | if (g->is_virtual) |