summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-06-09 14:42:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-06 17:44:16 -0400
commit583704620db88e391f6b14acc57af859a70127de (patch)
tree8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/gk20a/mm_gk20a.h
parentc1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff)
gpu: nvgpu: Implement PD packing
In some cases page directories require less than a full page of memory. For example, on Pascal, the final PD level for large pages is only 256 bytes; thus 16 PDs can fit in a single page. To allocate an entire page for each of these 256 B PDs is extremely wasteful. This patch aims to alleviate the wasted DMA memory from having small PDs in a full page by packing multiple small PDs into a single page. The packing is implemented as a slab allocator - each page is a slab and from each page multiple PD instances can be allocated. Several modifications to the nvgpu_gmmu_pd struct also needed to be made to support this. The nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem struct so that each nvgpu_gmmu_pd knows what portion of the memory it's using. The nvgpu_pde_phys_addr() function and the pd_write() functions also require some changes since the PD no longer is always situated at the start of the nvgpu_mem. Initialization and cleanup of the page tables for each VM was slightly modified to work through the new pd_cache implementation. Some PDs (i.e the PDB), despite not being a full page, still require a full page for alignment purposes (HW requirements). Thus a direct allocation method for PDs is still provided. This is also used when a PD that could in principle be cached is greater than a page in size. Lastly a new debug flag was added for the pd_cache code. JIRA NVGPU-30 Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1506610 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index a245d0e0..cadcffa4 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -31,6 +31,8 @@
31#include <nvgpu/rbtree.h> 31#include <nvgpu/rbtree.h>
32#include <nvgpu/kref.h> 32#include <nvgpu/kref.h>
33 33
34struct nvgpu_pd_cache;
35
34#ifdef CONFIG_ARM64 36#ifdef CONFIG_ARM64
35#define outer_flush_range(a, b) 37#define outer_flush_range(a, b)
36#define __cpuc_flush_dcache_area __flush_dcache_area 38#define __cpuc_flush_dcache_area __flush_dcache_area
@@ -217,6 +219,8 @@ struct mm_gk20a {
217 struct vm_gk20a *vm; 219 struct vm_gk20a *vm;
218 } ce; 220 } ce;
219 221
222 struct nvgpu_pd_cache *pd_cache;
223
220 struct nvgpu_mutex l2_op_lock; 224 struct nvgpu_mutex l2_op_lock;
221 struct nvgpu_mutex tlb_lock; 225 struct nvgpu_mutex tlb_lock;
222 struct nvgpu_mutex priv_lock; 226 struct nvgpu_mutex priv_lock;