nvgpu: add PD cache support for page-sized PTEs

Large buffers being mapped to GMMU end up needing many pages for the PTE tables. Allocating these pages one by one can end up being a performance bottleneck, particularly in the virtualized case. Add support for page-sized PTEs to the existing PD cache: - define NVGPU_PD_CACHE_SIZE, the allocation size for a new slab for the PD cache, effectively set to 64K bytes - Use the PD cache for any allocation < NVGPU_PD_CACHE_SIZE - When freeing up cached entries, avoid prefetch errors by invalidating the entry (memset to 0) Bug 3093183 Bug 3100907 Change-Id: I2302a1dfeb056b9461159121bbae1be70524a357 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2401783 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Satish Arora <satisha@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Peter Daifuku <pdaifuku@nvidia.com> 2020-08-20 21:45:26 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2020-09-15 05:38:45 -0400
commit: 036e000a17425e0569990f2aacae91b273392153 (patch)
tree: 5fb6845c56652a6bf97f70411846c891cdfbfab3 /drivers/gpu/nvgpu/include/nvgpu/gmmu.h
parent: 1c34f50227e9f308491758482d88c3c2f6605ffb (diff)
1 files changed, 17 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 8e1eeedc..ee2207b8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -28,6 +28,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/lock.h>
+#include <nvgpu/bitops.h>
 /*
 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
@@ -56,18 +57,25 @@ enum gk20a_mem_rw_flag {
 */
 #define NVGPU_PD_CACHE_MIN              256U
 #define NVGPU_PD_CACHE_MIN_SHIFT        9U
-#define NVGPU_PD_CACHE_COUNT            4U
+#define NVGPU_PD_CACHE_COUNT            8U
+#define NVGPU_PD_CACHE_SIZE             (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT))
 struct nvgpu_pd_mem_entry {
        struct nvgpu_mem                mem;
        /*
-         * Size of the page directories (not the mem). bmap is a bitmap showing
+         * Size of the page directories (not the mem). alloc_map is a bitmap
-         * which PDs have been allocated. The size of mem will always be one
+         * showing which PDs have been allocated.
-         * page. pd_size will always be a power of 2.
+         *
+         * The size of mem will be NVGPU_PD_CACHE_SIZE
+         * and pd_size will always be a power of 2.
+         *
         */
        u32                             pd_size;
-        unsigned long                   alloc_map;
+        DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN);
+        /* Total number of allocations in this PD. */
+        u32                             allocs;
        struct nvgpu_list_node          list_entry;
        struct nvgpu_rbtree_node        tree_entry;
@@ -251,7 +259,10 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
                      struct nvgpu_mem *mem,
                      u64 gpu_va);
-int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
+int nvgpu_pd_alloc(struct vm_gk20a *vm,
+                   struct nvgpu_gmmu_pd *pd,
+                   u32 bytes);
 void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
 int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
                                  struct nvgpu_gmmu_pd *pd, u32 bytes);
author	Peter Daifuku <pdaifuku@nvidia.com>	2020-08-20 21:45:26 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2020-09-15 05:38:45 -0400
commit	036e000a17425e0569990f2aacae91b273392153 (patch)
tree	5fb6845c56652a6bf97f70411846c891cdfbfab3 /drivers/gpu/nvgpu/include/nvgpu/gmmu.h
parent	1c34f50227e9f308491758482d88c3c2f6605ffb (diff)