From 583704620db88e391f6b14acc57af859a70127de Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Fri, 9 Jun 2017 11:42:50 -0700 Subject: gpu: nvgpu: Implement PD packing In some cases page directories require less than a full page of memory. For example, on Pascal, the final PD level for large pages is only 256 bytes; thus 16 PDs can fit in a single page. To allocate an entire page for each of these 256 B PDs is extremely wasteful. This patch aims to alleviate the wasted DMA memory from having small PDs in a full page by packing multiple small PDs into a single page. The packing is implemented as a slab allocator - each page is a slab and from each page multiple PD instances can be allocated. Several modifications to the nvgpu_gmmu_pd struct also needed to be made to support this. The nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem struct so that each nvgpu_gmmu_pd knows what portion of the memory it's using. The nvgpu_pde_phys_addr() function and the pd_write() functions also require some changes since the PD no longer is always situated at the start of the nvgpu_mem. Initialization and cleanup of the page tables for each VM was slightly modified to work through the new pd_cache implementation. Some PDs (i.e the PDB), despite not being a full page, still require a full page for alignment purposes (HW requirements). Thus a direct allocation method for PDs is still provided. This is also used when a PD that could in principle be cached is greater than a page in size. Lastly a new debug flag was added for the pd_cache code. JIRA NVGPU-30 Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c Signed-off-by: Alex Waterman Reviewed-on: https://git-master/r/1506610 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/mm/gmmu.c | 81 +++---- drivers/gpu/nvgpu/common/mm/pd_cache.c | 426 +++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/common/mm/vm.c | 50 ++-- 3 files changed, 499 insertions(+), 58 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/mm/pd_cache.c (limited to 'drivers/gpu/nvgpu/common') diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index ec1bc095..602dfb3b 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -45,7 +45,8 @@ static int pd_allocate(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, const struct gk20a_mmu_level *l, struct nvgpu_gmmu_attrs *attrs); - +static u32 pd_size(const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_attrs *attrs); /* * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU * VA will be allocated for you. If addr is non-zero then the buffer will be @@ -138,6 +139,9 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va) int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) { + u32 pdb_size; + int err; + /* * Need this just for page size. Everything else can be ignored. Also * note that we can just use pgsz 0 (i.e small pages) since the number @@ -148,56 +152,43 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) .pgsz = 0, }; - return pd_allocate(vm, &vm->pdb, &vm->mmu_levels[0], &attrs); -} + /* + * PDB size here must be one page so that its address is page size + * aligned. Although lower PDE tables can be aligned at 256B boundaries + * the main PDB must be page aligned. + */ + pdb_size = ALIGN(pd_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE); + + err = __nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size); + if (WARN_ON(err)) + return err; + /* + * One mb() is done after all mapping operations. Don't need individual + * barriers for each PD write. + */ + vm->pdb.mem->skip_wmb = true; + + return 0; +} /* * Ensure that there's a CPU mapping for the page directory memory. This won't * always be the case for 32 bit systems since we may need to save kernel * virtual memory. */ -static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *entry) +static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd) { - return nvgpu_mem_begin(g, &entry->mem); + return nvgpu_mem_begin(g, pd->mem); } /* * Handle any necessary CPU unmap semantics for a page directories DMA memory. * For 64 bit platforms this is a noop. */ -static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *entry) -{ - nvgpu_mem_end(g, &entry->mem); -} - -static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 bytes, - struct nvgpu_gmmu_pd *pd) -{ - struct gk20a *g = gk20a_from_vm(vm); - unsigned long flags = NVGPU_DMA_FORCE_CONTIGUOUS; - int err; - - /* - * On arm32 vmalloc space is a precious commodity so we do not map pages - * by default. - */ - if (!IS_ENABLED(CONFIG_ARM64)) - flags |= NVGPU_DMA_NO_KERNEL_MAPPING; - - err = nvgpu_dma_alloc_flags(g, flags, bytes, &pd->mem); - if (err) - return -ENOMEM; - - return 0; -} - -void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, - struct nvgpu_gmmu_pd *pd) +static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd) { - struct gk20a *g = gk20a_from_vm(vm); - - nvgpu_dma_free(g, &pd->mem); + nvgpu_mem_end(g, pd->mem); } /* @@ -205,10 +196,14 @@ void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, */ u64 nvgpu_pde_phys_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd) { + u64 page_addr; + if (g->mm.has_physical_mode) - return sg_phys(pd->mem.priv.sgt->sgl); + page_addr = sg_phys(pd->mem->priv.sgt->sgl); else - return nvgpu_mem_get_base_addr(g, &pd->mem, 0); + page_addr = nvgpu_mem_get_base_addr(g, pd->mem, 0); + + return page_addr + pd->mem_offs; } /* @@ -254,10 +249,10 @@ static int pd_allocate(struct vm_gk20a *vm, { int err; - if (pd->mem.size) + if (pd->mem) return 0; - err = nvgpu_alloc_gmmu_pages(vm, pd_size(l, attrs), pd); + err = __nvgpu_pd_alloc(vm, pd, pd_size(l, attrs)); if (err) { nvgpu_info(vm->mm->g, "error allocating page directory!"); return err; @@ -267,7 +262,7 @@ static int pd_allocate(struct vm_gk20a *vm, * One mb() is done after all mapping operations. Don't need individual * barriers for each PD write. */ - pd->mem.skip_wmb = true; + pd->mem->skip_wmb = true; return 0; } @@ -778,7 +773,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, } if (!batch) - g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); + g->ops.fb.tlb_invalidate(g, vm->pdb.mem); else batch->need_tlb_invalidate = true; @@ -830,7 +825,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, if (!batch) { gk20a_mm_l2_flush(g, true); - g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); + g->ops.fb.tlb_invalidate(g, vm->pdb.mem); } else { if (!batch->gpu_l2_flushed) { gk20a_mm_l2_flush(g, true); diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c new file mode 100644 index 00000000..4f312eff --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +#define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args) + +/** + * DOC: PD cache + * + * In the name of saving memory with the many sub-page sized PD levels in Pascal + * and beyond a way of packing PD tables together is necessary. This code here + * does just that. If a PD table only requires 1024 bytes, then it is possible + * to have 4 of these PDs in one page. This is even more pronounced for 256 byte + * PD tables. + * + * The pd cache is basially just a slab allocator. Each instance of the nvgpu + * driver makes one of these structs: + * + * struct nvgpu_pd_cache { + * struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT]; + * struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT]; + * + * struct nvgpu_rbtree_node *mem_tree; + * }; + * + * There are two sets of lists, the full and the partial. The full lists contain + * pages of memory for which all the memory in that page is in use. The partial + * lists contain partially full pages of memory which can be used for more PD + * allocations. There a couple of assumptions here: + * + * 1. PDs greater than or equal to the page size bypass the pd cache. + * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes. + * + * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial + * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for + * 256, 512, 1024, and 2048 byte PDs. + * + * __nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD + * size is page size or larger and choose the correct allocation scheme - either + * from the PD cache or directly. Similarly __nvgpu_pd_free() will free a PD + * allocated by __nvgpu_pd_alloc(). + * + * Since the top level PD (the PDB) is a page aligned pointer but less than a + * page size the direct functions must be used for allocating PDBs. Otherwise + * there would be alignment issues for the PDBs when they get packed. + */ + +static u32 nvgpu_pd_cache_nr(u32 bytes) +{ + return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1)); +} + +static u32 nvgpu_pd_cache_get_mask(struct nvgpu_pd_mem_entry *pentry) +{ + u32 mask_offset = 1 << (PAGE_SIZE / pentry->pd_size); + + return mask_offset - 1; +} + +int nvgpu_pd_cache_init(struct gk20a *g) +{ + struct nvgpu_pd_cache *cache; + int i; + + /* + * This gets called from finalize_poweron() so we need to make sure we + * don't reinit the pd_cache over and over. + */ + if (g->mm.pd_cache) + return 0; + + cache = nvgpu_kzalloc(g, sizeof(*cache)); + if (!cache) { + nvgpu_err(g, "Failed to alloc pd_cache!"); + return -ENOMEM; + } + + for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) { + nvgpu_init_list_node(&cache->full[i]); + nvgpu_init_list_node(&cache->partial[i]); + } + + cache->mem_tree = NULL; + g->mm.pd_cache = cache; + nvgpu_mutex_init(&cache->lock); + + pd_dbg(g, "PD cache initialized!"); + + return 0; +} + +void nvgpu_pd_cache_fini(struct gk20a *g) +{ + int i; + struct nvgpu_pd_cache *cache = g->mm.pd_cache; + + if (!cache) + return; + + for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) { + WARN_ON(!nvgpu_list_empty(&cache->full[i])); + WARN_ON(!nvgpu_list_empty(&cache->partial[i])); + } + + nvgpu_kfree(g, g->mm.pd_cache); +} + +/* + * This is the simple pass-through for greater than page or page sized PDs. + * + * Note: this does not need the cache lock since it does not modify any of the + * PD cache data structures. + */ +int __nvgpu_pd_cache_alloc_direct(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 bytes) +{ + int err; + + pd_dbg(g, "PD-Alloc [D] %u bytes", bytes); + + pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem)); + if (!pd->mem) { + pd_dbg(g, "OOM allocating nvgpu_mem struct!"); + return -ENOMEM; + } + + err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_FORCE_CONTIGUOUS, + bytes, pd->mem); + if (err) { + pd_dbg(g, "OOM allocating page directory!"); + nvgpu_kfree(g, pd->mem); + return -ENOMEM; + } + + pd->cached = false; + pd->mem_offs = 0; + + return 0; +} + +/* + * Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed + * pd to reflect this allocation. + */ +static int nvgpu_pd_cache_alloc_new(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd, + u32 bytes) +{ + struct nvgpu_pd_mem_entry *pentry; + + pd_dbg(g, "PD-Alloc [C] New: offs=0"); + + pentry = nvgpu_kzalloc(g, sizeof(*pentry)); + if (!pentry) { + pd_dbg(g, "OOM allocating pentry!"); + return -ENOMEM; + } + + if (nvgpu_dma_alloc_flags(g, NVGPU_DMA_FORCE_CONTIGUOUS, + PAGE_SIZE, &pentry->mem)) { + nvgpu_kfree(g, pentry); + pd_dbg(g, "Unable to DMA alloc!"); + return -ENOMEM; + } + + pentry->pd_size = bytes; + nvgpu_list_add(&pentry->list_entry, + &cache->partial[nvgpu_pd_cache_nr(bytes)]); + + /* + * This allocates the very first PD table in the set of tables in this + * nvgpu_pd_mem_entry. + */ + pentry->alloc_map = 1; + + /* + * Now update the nvgpu_gmmu_pd to reflect this allocation. + */ + pd->mem = &pentry->mem; + pd->mem_offs = 0; + pd->cached = true; + + pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem; + nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree); + + return 0; +} + +static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_pd_mem_entry *pentry, + struct nvgpu_gmmu_pd *pd) +{ + unsigned long bit_offs; + u32 mem_offs; + u32 pentry_mask = nvgpu_pd_cache_get_mask(pentry); + + /* + * Find and allocate an open PD. + */ + bit_offs = ffz(pentry->alloc_map); + mem_offs = bit_offs * pentry->pd_size; + + /* Bit map full. Somethings wrong. */ + if (WARN_ON(bit_offs >= ffz(pentry_mask))) + return -ENOMEM; + + pentry->alloc_map |= 1 << bit_offs; + + pd_dbg(g, "PD-Alloc [C] Partial: offs=%lu", bit_offs); + + /* + * First update the pd. + */ + pd->mem = &pentry->mem; + pd->mem_offs = mem_offs; + pd->cached = true; + + /* + * Now make sure the pentry is in the correct list (full vs partial). + */ + if ((pentry->alloc_map & pentry_mask) == pentry_mask) { + pd_dbg(g, "Adding pentry to full list!"); + nvgpu_list_del(&pentry->list_entry); + nvgpu_list_add(&pentry->list_entry, + &cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]); + } + + return 0; +} + +/* + * Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial + * nvgpu_pd_mem_entry's. + */ +static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial( + struct nvgpu_pd_cache *cache, u32 bytes) +{ + struct nvgpu_list_node *list = + &cache->partial[nvgpu_pd_cache_nr(bytes)]; + + if (nvgpu_list_empty(list)) + return NULL; + + return nvgpu_list_first_entry(list, + nvgpu_pd_mem_entry, + list_entry); +} + +/* + * Allocate memory from an nvgpu_mem for the page directory. + */ +static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd, u32 bytes) +{ + struct nvgpu_pd_mem_entry *pentry; + int err; + + pd_dbg(g, "PD-Alloc [C] %u bytes", bytes); + + if (bytes & (bytes - 1) || + (bytes >= PAGE_SIZE || + bytes < NVGPU_PD_CACHE_MIN)) { + pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes); + return -EINVAL; + } + + pentry = nvgpu_pd_cache_get_partial(cache, bytes); + if (!pentry) + err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes); + else + err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd); + + if (err) + pd_dbg(g, "PD-Alloc [C] Failed!"); + + return err; +} + +/* + * Allocate the DMA memory for a page directory. This handles the necessary PD + * cache logistics. Since on Parker and later GPUs some of the page directories + * are smaller than a page packing these PDs together saves a lot of memory. + */ +int __nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes) +{ + struct gk20a *g = gk20a_from_vm(vm); + int err; + + /* + * Simple case: PD is bigger than a page so just do a regular DMA + * alloc. + */ + if (bytes >= PAGE_SIZE) { + err = __nvgpu_pd_cache_alloc_direct(g, pd, bytes); + if (err) + return err; + + return 0; + } + + if (WARN_ON(!g->mm.pd_cache)) + return -ENOMEM; + + nvgpu_mutex_acquire(&g->mm.pd_cache->lock); + err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes); + nvgpu_mutex_release(&g->mm.pd_cache->lock); + + return err; +} + +void __nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd) +{ + pd_dbg(g, "PD-Free [D] 0x%p", pd->mem); + + if (!pd->mem) + return; + + nvgpu_dma_free(g, pd->mem); + nvgpu_kfree(g, pd->mem); + pd->mem = NULL; +} + +static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_pd_mem_entry *pentry) +{ + nvgpu_dma_free(g, &pentry->mem); + nvgpu_list_del(&pentry->list_entry); + nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree); + nvgpu_kfree(g, pentry); +} + +static void nvgpu_pd_cache_do_free(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_pd_mem_entry *pentry, + struct nvgpu_gmmu_pd *pd) +{ + u32 index = pd->mem_offs / pentry->pd_size; + u32 bit = 1 << index; + + /* Mark entry as free. */ + pentry->alloc_map &= ~bit; + + if (pentry->alloc_map & nvgpu_pd_cache_get_mask(pentry)) { + /* + * Partially full still. If it was already on the partial list + * this just re-adds it. + */ + nvgpu_list_del(&pentry->list_entry); + nvgpu_list_add(&pentry->list_entry, + &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); + } else { + /* Empty now so free it. */ + nvgpu_pd_cache_free_mem_entry(g, cache, pentry); + } +} + +static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up( + struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd) +{ + struct nvgpu_rbtree_node *node; + + nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node, + cache->mem_tree); + if (!node) + return NULL; + + return nvgpu_pd_mem_entry_from_tree_entry(node); +} + +static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd) +{ + struct nvgpu_pd_mem_entry *pentry; + + pd_dbg(g, "PD-Free [C] 0x%p", pd->mem); + + pentry = nvgpu_pd_cache_look_up(g, cache, pd); + if (!pentry) { + WARN(1, "Attempting to free non-existent pd"); + return; + } + + nvgpu_pd_cache_do_free(g, cache, pentry, pd); +} + +void __nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd) +{ + struct gk20a *g = gk20a_from_vm(vm); + + /* + * Simple case: just DMA free. + */ + if (!pd->cached) + return __nvgpu_pd_cache_free_direct(g, pd); + + nvgpu_mutex_acquire(&g->mm.pd_cache->lock); + nvgpu_pd_cache_free(g, g->mm.pd_cache, pd); + nvgpu_mutex_release(&g->mm.pd_cache->lock); +} diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3aeba500..3ed3c7fe 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -35,21 +35,42 @@ int vm_aspace_id(struct vm_gk20a *vm) return vm->as_share ? vm->as_share->id : -1; } -static void nvgpu_vm_free_entries(struct vm_gk20a *vm, - struct nvgpu_gmmu_pd *parent, - int level) +static void __nvgpu_vm_free_entries(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + int level) { int i; - if (parent->entries) - for (i = 0; i < parent->num_entries; i++) - nvgpu_vm_free_entries(vm, &parent->entries[i], + if (pd->mem) { + __nvgpu_pd_free(vm, pd); + pd->mem = NULL; + } + + if (pd->entries) { + for (i = 0; i < pd->num_entries; i++) + __nvgpu_vm_free_entries(vm, &pd->entries[i], level + 1); + nvgpu_vfree(vm->mm->g, pd->entries); + pd->entries = NULL; + } +} + +static void nvgpu_vm_free_entries(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pdb) +{ + struct gk20a *g = vm->mm->g; + int i; + + __nvgpu_pd_cache_free_direct(g, pdb); + + if (!pdb->entries) + return; + + for (i = 0; i < pdb->num_entries; i++) + __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1); - if (parent->mem.size) - nvgpu_free_gmmu_pages(vm, parent); - nvgpu_vfree(vm->mm->g, parent->entries); - parent->entries = NULL; + nvgpu_vfree(g, pdb->entries); + pdb->entries = NULL; } u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, @@ -110,7 +131,7 @@ void nvgpu_vm_mapping_batch_finish_locked( if (mapping_batch->need_tlb_invalidate) { struct gk20a *g = gk20a_from_vm(vm); - g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); + g->ops.fb.tlb_invalidate(g, vm->pdb.mem); } } @@ -407,9 +428,8 @@ clean_up_allocators: if (nvgpu_alloc_initialized(&vm->user_lp)) nvgpu_alloc_destroy(&vm->user_lp); clean_up_page_tables: - /* Cleans up nvgpu_vm_init_page_tables() */ - nvgpu_vfree(g, vm->pdb.entries); - nvgpu_free_gmmu_pages(vm, &vm->pdb); + /* Cleans up nvgpu_gmmu_init_page_table() */ + __nvgpu_pd_cache_free_direct(g, &vm->pdb); clean_up_vgpu_vm: #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION if (g->is_virtual) @@ -525,7 +545,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm) if (nvgpu_alloc_initialized(&vm->user_lp)) nvgpu_alloc_destroy(&vm->user_lp); - nvgpu_vm_free_entries(vm, &vm->pdb, 0); + nvgpu_vm_free_entries(vm, &vm->pdb); #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION if (g->is_virtual) -- cgit v1.2.2