summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2020-09-30 14:25:05 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2020-10-06 13:10:02 -0400
commit5a948ccca95bcecf9d1e81db02394134f8a18c38 (patch)
treefb9e43f6750d9c804e5eb8b161a1c634428f9914
parentcd134bb198d7138a3c2fcb17d11f2eedf934e2c4 (diff)
gpu: nvgpu: limit PD cache to < pgsize for linux
For Linux, limit the use of the cache to entries less than the page size, to avoid potential problems with running out of CMA memory when allocating large, contiguous slabs, as would be required for non-iommmuable chips. Also, in nvgpu_pd_cache_do_free(), zero out entries only if iommu is in use and PTE entries use the cache (since it's the prefetch of invalid PTEs by iommu that needs to be avoided). Bug 3093183 Bug 3100907 Change-Id: I363031db32e11bc705810a7e87fc9e9ac1dc00bd Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2422039 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Dinesh T <dt@nvidia.com> Reviewed-by: Satish Arora <satisha@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/mm/pd_cache.c15
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h19
2 files changed, 30 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c
index a5b3d134..8f7003e5 100644
--- a/drivers/gpu/nvgpu/common/mm/pd_cache.c
+++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c
@@ -423,12 +423,19 @@ static void nvgpu_pd_cache_do_free(struct gk20a *g,
423 * this just re-adds it. 423 * this just re-adds it.
424 * 424 *
425 * Since the memory used for the entries is still mapped, if 425 * Since the memory used for the entries is still mapped, if
426 * igpu make sure the entries are invalidated so that the hw 426 * iommu is being used, make sure PTE entries in particular
427 * doesn't accidentally try to prefetch non-existent fb memory. 427 * are invalidated so that the hw doesn't accidentally try to
428 * prefetch non-existent fb memory.
428 * 429 *
429 * TBD: what about dgpu? (Not supported in Drive 5.0) 430 * Notes:
431 * - The check for NVGPU_PD_CACHE_SIZE > PAGE_SIZE effectively
432 * determines whether PTE entries use the cache.
433 * - In the case where PTE entries ues the cache, we also
434 * end up invalidating the PDE entries, but that's a minor
435 * performance hit, as there are far fewer of those
436 * typically than there are PTE entries.
430 */ 437 */
431 if (pd->mem->cpu_va != NULL) { 438 if (nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > PAGE_SIZE)) {
432 memset((void *)((u64)pd->mem->cpu_va + pd->mem_offs), 0, 439 memset((void *)((u64)pd->mem->cpu_va + pd->mem_offs), 0,
433 pentry->pd_size); 440 pentry->pd_size);
434 } 441 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index ee2207b8..2fc0d44e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -29,6 +29,7 @@
29#include <nvgpu/rbtree.h> 29#include <nvgpu/rbtree.h>
30#include <nvgpu/lock.h> 30#include <nvgpu/lock.h>
31#include <nvgpu/bitops.h> 31#include <nvgpu/bitops.h>
32#include <nvgpu/mm.h>
32 33
33/* 34/*
34 * This is the GMMU API visible to blocks outside of the GMMU. Basically this 35 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
@@ -54,10 +55,28 @@ enum gk20a_mem_rw_flag {
54 * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache 55 * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache
55 * structure is of course depending on this. The MIN_SHIFT define is the right 56 * structure is of course depending on this. The MIN_SHIFT define is the right
56 * number of bits to shift to determine which list to use in the array of lists. 57 * number of bits to shift to determine which list to use in the array of lists.
58 *
59 * For Linux, limit the use of the cache to entries less than the page size, to
60 * avoid potential problems with running out of CMA memory when allocating large,
61 * contiguous slabs, as would be required for non-iommmuable chips.
57 */ 62 */
58#define NVGPU_PD_CACHE_MIN 256U 63#define NVGPU_PD_CACHE_MIN 256U
59#define NVGPU_PD_CACHE_MIN_SHIFT 9U 64#define NVGPU_PD_CACHE_MIN_SHIFT 9U
65
66#ifdef __KERNEL__
67
68#if PAGE_SIZE == 4096
69#define NVGPU_PD_CACHE_COUNT 4U
70#elif PAGE_SIZE == 65536
60#define NVGPU_PD_CACHE_COUNT 8U 71#define NVGPU_PD_CACHE_COUNT 8U
72#else
73#error "Unsupported page size."
74#endif
75
76#else
77#define NVGPU_PD_CACHE_COUNT 8U
78#endif
79
61#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT)) 80#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * (1U << NVGPU_PD_CACHE_COUNT))
62 81
63struct nvgpu_pd_mem_entry { 82struct nvgpu_pd_mem_entry {