diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-06-09 14:42:50 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-06 17:44:16 -0400 |
commit | 583704620db88e391f6b14acc57af859a70127de (patch) | |
tree | 8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |
parent | c1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff) |
gpu: nvgpu: Implement PD packing
In some cases page directories require less than a full page of memory.
For example, on Pascal, the final PD level for large pages is only 256 bytes;
thus 16 PDs can fit in a single page. To allocate an entire page for each of
these 256 B PDs is extremely wasteful. This patch aims to alleviate the
wasted DMA memory from having small PDs in a full page by packing multiple
small PDs into a single page.
The packing is implemented as a slab allocator - each page is a slab and
from each page multiple PD instances can be allocated. Several modifications
to the nvgpu_gmmu_pd struct also needed to be made to support this. The
nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem
struct so that each nvgpu_gmmu_pd knows what portion of the memory it's
using.
The nvgpu_pde_phys_addr() function and the pd_write() functions also require
some changes since the PD no longer is always situated at the start of the
nvgpu_mem.
Initialization and cleanup of the page tables for each VM was slightly
modified to work through the new pd_cache implementation. Some PDs (i.e
the PDB), despite not being a full page, still require a full page for
alignment purposes (HW requirements). Thus a direct allocation method for
PDs is still provided. This is also used when a PD that could in principle
be cached is greater than a page in size.
Lastly a new debug flag was added for the pd_cache code.
JIRA NVGPU-30
Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master/r/1506610
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index a245d0e0..cadcffa4 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <nvgpu/rbtree.h> | 31 | #include <nvgpu/rbtree.h> |
32 | #include <nvgpu/kref.h> | 32 | #include <nvgpu/kref.h> |
33 | 33 | ||
34 | struct nvgpu_pd_cache; | ||
35 | |||
34 | #ifdef CONFIG_ARM64 | 36 | #ifdef CONFIG_ARM64 |
35 | #define outer_flush_range(a, b) | 37 | #define outer_flush_range(a, b) |
36 | #define __cpuc_flush_dcache_area __flush_dcache_area | 38 | #define __cpuc_flush_dcache_area __flush_dcache_area |
@@ -217,6 +219,8 @@ struct mm_gk20a { | |||
217 | struct vm_gk20a *vm; | 219 | struct vm_gk20a *vm; |
218 | } ce; | 220 | } ce; |
219 | 221 | ||
222 | struct nvgpu_pd_cache *pd_cache; | ||
223 | |||
220 | struct nvgpu_mutex l2_op_lock; | 224 | struct nvgpu_mutex l2_op_lock; |
221 | struct nvgpu_mutex tlb_lock; | 225 | struct nvgpu_mutex tlb_lock; |
222 | struct nvgpu_mutex priv_lock; | 226 | struct nvgpu_mutex priv_lock; |