summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-06-09 14:42:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-06 17:44:16 -0400
commit583704620db88e391f6b14acc57af859a70127de (patch)
tree8fc3becf2850b724e87011b0e0250c52d0efb7ee /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parentc1393d5b68e63c992f4c689cb788139fdf8c2f1a (diff)
gpu: nvgpu: Implement PD packing
In some cases page directories require less than a full page of memory. For example, on Pascal, the final PD level for large pages is only 256 bytes; thus 16 PDs can fit in a single page. To allocate an entire page for each of these 256 B PDs is extremely wasteful. This patch aims to alleviate the wasted DMA memory from having small PDs in a full page by packing multiple small PDs into a single page. The packing is implemented as a slab allocator - each page is a slab and from each page multiple PD instances can be allocated. Several modifications to the nvgpu_gmmu_pd struct also needed to be made to support this. The nvgpu_mem is now a pointer and there's an explicit offset into the nvgpu_mem struct so that each nvgpu_gmmu_pd knows what portion of the memory it's using. The nvgpu_pde_phys_addr() function and the pd_write() functions also require some changes since the PD no longer is always situated at the start of the nvgpu_mem. Initialization and cleanup of the page tables for each VM was slightly modified to work through the new pd_cache implementation. Some PDs (i.e the PDB), despite not being a full page, still require a full page for alignment purposes (HW requirements). Thus a direct allocation method for PDs is still provided. This is also used when a PD that could in principle be cached is greater than a page in size. Lastly a new debug flag was added for the pd_cache code. JIRA NVGPU-30 Change-Id: I64c8037fc356783c1ef203cc143c4d71bbd5d77c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master/r/1506610 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c9
1 files changed, 5 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 558a1b06..0a84cabb 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -478,6 +478,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
478 478
479 gk20a_semaphore_sea_destroy(g); 479 gk20a_semaphore_sea_destroy(g);
480 gk20a_vidmem_destroy(g); 480 gk20a_vidmem_destroy(g);
481 nvgpu_pd_cache_fini(g);
481} 482}
482 483
483static int gk20a_alloc_sysmem_flush(struct gk20a *g) 484static int gk20a_alloc_sysmem_flush(struct gk20a *g)
@@ -1560,7 +1561,7 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
1560 struct nvgpu_gmmu_pd *pd, u64 addr) 1561 struct nvgpu_gmmu_pd *pd, u64 addr)
1561{ 1562{
1562 u32 pde0_bits = 1563 u32 pde0_bits =
1563 nvgpu_aperture_mask(g, &pd->mem, 1564 nvgpu_aperture_mask(g, pd->mem,
1564 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 1565 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
1565 gmmu_pde_aperture_big_video_memory_f()) | 1566 gmmu_pde_aperture_big_video_memory_f()) |
1566 gmmu_pde_address_big_sys_f( 1567 gmmu_pde_address_big_sys_f(
@@ -1573,7 +1574,7 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
1573 struct nvgpu_gmmu_pd *pd, u64 addr) 1574 struct nvgpu_gmmu_pd *pd, u64 addr)
1574{ 1575{
1575 u32 pde1_bits = 1576 u32 pde1_bits =
1576 nvgpu_aperture_mask(g, &pd->mem, 1577 nvgpu_aperture_mask(g, pd->mem,
1577 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 1578 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
1578 gmmu_pde_aperture_small_video_memory_f()) | 1579 gmmu_pde_aperture_small_video_memory_f()) |
1579 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 1580 gmmu_pde_vol_small_true_f() | /* tbd: why? */
@@ -2173,14 +2174,14 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
2173void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, 2174void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
2174 struct vm_gk20a *vm) 2175 struct vm_gk20a *vm)
2175{ 2176{
2176 u64 pdb_addr = nvgpu_mem_get_base_addr(g, &vm->pdb.mem, 0); 2177 u64 pdb_addr = nvgpu_mem_get_base_addr(g, vm->pdb.mem, 0);
2177 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 2178 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
2178 u32 pdb_addr_hi = u64_hi32(pdb_addr); 2179 u32 pdb_addr_hi = u64_hi32(pdb_addr);
2179 2180
2180 gk20a_dbg_info("pde pa=0x%llx", pdb_addr); 2181 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
2181 2182
2182 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 2183 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
2183 nvgpu_aperture_mask(g, &vm->pdb.mem, 2184 nvgpu_aperture_mask(g, vm->pdb.mem,
2184 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 2185 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
2185 ram_in_page_dir_base_target_vid_mem_f()) | 2186 ram_in_page_dir_base_target_vid_mem_f()) |
2186 ram_in_page_dir_base_vol_true_f() | 2187 ram_in_page_dir_base_vol_true_f() |