summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2020-08-20 21:45:26 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2020-09-15 05:38:45 -0400
commit036e000a17425e0569990f2aacae91b273392153 (patch)
tree5fb6845c56652a6bf97f70411846c891cdfbfab3 /drivers/gpu/nvgpu/include/nvgpu/gmmu.h
parent1c34f50227e9f308491758482d88c3c2f6605ffb (diff)
nvgpu: add PD cache support for page-sized PTEs
Large buffers being mapped to GMMU end up needing many pages for the PTE tables. Allocating these pages one by one can end up being a performance bottleneck, particularly in the virtualized case. Add support for page-sized PTEs to the existing PD cache: - define NVGPU_PD_CACHE_SIZE, the allocation size for a new slab for the PD cache, effectively set to 64K bytes - Use the PD cache for any allocation < NVGPU_PD_CACHE_SIZE - When freeing up cached entries, avoid prefetch errors by invalidating the entry (memset to 0) Bug 3093183 Bug 3100907 Change-Id: I2302a1dfeb056b9461159121bbae1be70524a357 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2401783 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Satish Arora <satisha@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/include/nvgpu/gmmu.h')
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h23
1 files changed, 17 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 8e1eeedc..ee2207b8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -28,6 +28,7 @@
28#include <nvgpu/list.h> 28#include <nvgpu/list.h>
29#include <nvgpu/rbtree.h> 29#include <nvgpu/rbtree.h>
30#include <nvgpu/lock.h> 30#include <nvgpu/lock.h>
31#include <nvgpu/bitops.h>
31 32
32/* 33/*
33 * This is the GMMU API visible to blocks outside of the GMMU. Basically this 34 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
@@ -56,18 +57,25 @@ enum gk20a_mem_rw_flag {
56 */ 57 */
57#define NVGPU_PD_CACHE_MIN 256U 58#define NVGPU_PD_CACHE_MIN 256U
58#define NVGPU_PD_CACHE_MIN_SHIFT 9U 59#define NVGPU_PD_CACHE_MIN_SHIFT 9U
59#define NVGPU_PD_CACHE_COUNT 4U 60#define NVGPU_PD_CACHE_COUNT 8U
61#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT))
60 62
61struct nvgpu_pd_mem_entry { 63struct nvgpu_pd_mem_entry {
62 struct nvgpu_mem mem; 64 struct nvgpu_mem mem;
63 65
64 /* 66 /*
65 * Size of the page directories (not the mem). bmap is a bitmap showing 67 * Size of the page directories (not the mem). alloc_map is a bitmap
66 * which PDs have been allocated. The size of mem will always be one 68 * showing which PDs have been allocated.
67 * page. pd_size will always be a power of 2. 69 *
70 * The size of mem will be NVGPU_PD_CACHE_SIZE
71 * and pd_size will always be a power of 2.
72 *
68 */ 73 */
69 u32 pd_size; 74 u32 pd_size;
70 unsigned long alloc_map; 75 DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN);
76
77 /* Total number of allocations in this PD. */
78 u32 allocs;
71 79
72 struct nvgpu_list_node list_entry; 80 struct nvgpu_list_node list_entry;
73 struct nvgpu_rbtree_node tree_entry; 81 struct nvgpu_rbtree_node tree_entry;
@@ -251,7 +259,10 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
251 struct nvgpu_mem *mem, 259 struct nvgpu_mem *mem,
252 u64 gpu_va); 260 u64 gpu_va);
253 261
254int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes); 262int nvgpu_pd_alloc(struct vm_gk20a *vm,
263 struct nvgpu_gmmu_pd *pd,
264 u32 bytes);
265
255void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); 266void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
256int nvgpu_pd_cache_alloc_direct(struct gk20a *g, 267int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
257 struct nvgpu_gmmu_pd *pd, u32 bytes); 268 struct nvgpu_gmmu_pd *pd, u32 bytes);