gpu: nvgpu: Implement PD packing

In some cases page directories require less than a full page of memory. For example, on Pascal, the final PD level for large pages is only 256 bytes; thus 16 PDs can fit in a single page. To allocate an entire page for each of these 256 B PDs is extremely wasteful. This patch aims to alleviate the wasted DMA memory from having small PDs in a full page by packing multiple small PDs into a single page. The packing is implemented as a slab allocator - each page is a slab and from each page multiple PD instances can be allocated. Several modifications to the nvgpu_gmmu_pd struct also needed to be made to support this. The nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem struct so that each nvgpu_gmmu_pd knows what portion of the memory it's using. The nvgpu_pde_phys_addr() function and the pd_write() functions also require some changes since the PD no longer is always situated at the start of the nvgpu_mem. Initialization and cleanup of the page tables for each VM was slightly modified to work through the new pd_cache implementation. Some PDs (i.e the PDB), despite not being a full page, still require a full page for alignment purposes (HW requirements). Thus a direct allocation method for PDs is still provided. This is also used when a PD that could in principle be cached is greater than a page in size. Lastly a new debug flag was added for the pd_cache code. JIRA NVGPU-30 Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1506610 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
author: Alex Waterman <alexw@nvidia.com> 2017-06-09 14:42:50 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-07-06 17:44:16 -0400
commit: 583704620db88e391f6b14acc57af859a70127de (patch)
tree: 8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/common/mm/vm.c
parent: c1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff)
1 files changed, 35 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 3aeba500..3ed3c7fe 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -35,21 +35,42 @@ int vm_aspace_id(struct vm_gk20a *vm)
        return vm->as_share ? vm->as_share->id : -1;
 }
-static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
+static void __nvgpu_vm_free_entries(struct vm_gk20a *vm,
-                                  struct nvgpu_gmmu_pd *parent,
+                                    struct nvgpu_gmmu_pd *pd,
-                                  int level)
+                                    int level)
 {
        int i;
-        if (parent->entries)
+        if (pd->mem) {
-                for (i = 0; i < parent->num_entries; i++)
+                __nvgpu_pd_free(vm, pd);
-                        nvgpu_vm_free_entries(vm, &parent->entries[i],
+                pd->mem = NULL;
+        }
+        if (pd->entries) {
+                for (i = 0; i < pd->num_entries; i++)
+                        __nvgpu_vm_free_entries(vm, &pd->entries[i],
                                              level + 1);
+                nvgpu_vfree(vm->mm->g, pd->entries);
+                pd->entries = NULL;
+        }
+}
+static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
+                                  struct nvgpu_gmmu_pd *pdb)
+{
+        struct gk20a *g = vm->mm->g;
+        int i;
+        __nvgpu_pd_cache_free_direct(g, pdb);
+        if (!pdb->entries)
+                return;
+        for (i = 0; i < pdb->num_entries; i++)
+                __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1);
-        if (parent->mem.size)
+        nvgpu_vfree(g, pdb->entries);
-                nvgpu_free_gmmu_pages(vm, parent);
+        pdb->entries = NULL;
-        nvgpu_vfree(vm->mm->g, parent->entries);
-        parent->entries = NULL;
 }
 u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
@@ -110,7 +131,7 @@ void nvgpu_vm_mapping_batch_finish_locked(
        if (mapping_batch->need_tlb_invalidate) {
                struct gk20a *g = gk20a_from_vm(vm);
-                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
+                g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
        }
 }
@@ -407,9 +428,8 @@ clean_up_allocators:
        if (nvgpu_alloc_initialized(&vm->user_lp))
                nvgpu_alloc_destroy(&vm->user_lp);
 clean_up_page_tables:
-        /* Cleans up nvgpu_vm_init_page_tables() */
+        /* Cleans up nvgpu_gmmu_init_page_table() */
-        nvgpu_vfree(g, vm->pdb.entries);
+        __nvgpu_pd_cache_free_direct(g, &vm->pdb);
-        nvgpu_free_gmmu_pages(vm, &vm->pdb);
 clean_up_vgpu_vm:
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        if (g->is_virtual)
@@ -525,7 +545,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
        if (nvgpu_alloc_initialized(&vm->user_lp))
                nvgpu_alloc_destroy(&vm->user_lp);
-        nvgpu_vm_free_entries(vm, &vm->pdb, 0);
+        nvgpu_vm_free_entries(vm, &vm->pdb);
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        if (g->is_virtual)
author	Alex Waterman <alexw@nvidia.com>	2017-06-09 14:42:50 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-07-06 17:44:16 -0400
commit	583704620db88e391f6b14acc57af859a70127de (patch)
tree	8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/common/mm/vm.c
parent	c1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff)