From 68dbfedd4f837d1bf2bab128500140f5d8cfadac Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 26 Oct 2017 15:00:47 -0700
Subject: gpu: nvgpu: fix pte location functions

Modify the recursive loop in pte_find to make sure it is targeting the proper
pde page size.

JIRA NVGPUGV100-36

Change-Id: Ib3673d8d9f1bd3c907d532f9e2562ecdc5dda4af
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1586739
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu.c     |  5 +++
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c     | 30 +++++++++++--
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h     |  4 ++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c     | 77 +++++++++++++++++++++++++++++++---
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h |  5 +++
 5 files changed, 112 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 1eed3a3b..d6aaf8cd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -828,6 +828,11 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
 		if (!pd_next->mem)
 			return -EINVAL;
 
+		attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
+
+		if (attrs->pgsz >= gmmu_nr_page_sizes)
+			return -EINVAL;
+
 		return __nvgpu_locate_pte(g, vm, pd_next,
 					  vaddr, lvl + 1, attrs,
 					  data, pd_out, pd_idx_out,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 62f19039..cb0c015e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,15 +287,35 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 	pd_write(g, pd, pd_offset + 1, pte_w[1]);
 }
 
+enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	/*
+	 * big and small page sizes are the same
+	 */
+	return gmmu_page_size_small;
+}
+
+enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	/*
+	 * return invalid
+	 */
+	return gmmu_nr_page_sizes;
+}
+
 const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
 	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
 	 .lo_bit = {26, 26},
 	 .update_entry = update_gmmu_pde_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {25, 25},
 	 .lo_bit = {12, 16},
 	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
 	{.update_entry = NULL}
 };
 
@@ -303,11 +323,13 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
 	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
 	 .lo_bit = {27, 27},
 	 .update_entry = update_gmmu_pde_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {26, 26},
 	 .lo_bit = {12, 17},
 	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
 	{.update_entry = NULL}
 };
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 183d6211..2478ee1f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -183,4 +183,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
 extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
 extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
 
+enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 #endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index dc746153..9bfb290a 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 	pd_write(g, pd, pd_offset + 1, pte_w[1]);
 }
 
+#define GP10B_PDE0_ENTRY_SIZE 16
+
+/*
+ * Calculate the pgsz of the pde level
+ * Pascal+ implements a 5 level page table structure with only the last
+ * level having a different number of entries depending on whether it holds
+ * big pages or small pages.
+ */
+static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	u32 pde_base = pd->mem_offs / sizeof(u32);
+	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
+	u32 i;
+	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
+
+	if (!pd->mem)
+		return pgsz;
+
+	nvgpu_mem_begin(g, pd->mem);
+	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
+		pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
+	nvgpu_mem_end(g, pd->mem);
+
+	/*
+	 * Check if the aperture AND address are set
+	 */
+	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
+			gmmu_new_dual_pde_aperture_small_video_memory_f())) {
+		u64 addr = ((u64) (pde_v[2] &
+			gmmu_new_dual_pde_address_small_sys_f(~0)) <<
+			gmmu_new_dual_pde_address_shift_v()) |
+			((u64) pde_v[3] << 32);
+
+		if (addr)
+			pgsz = gmmu_page_size_small;
+	}
+
+	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
+			gmmu_new_dual_pde_aperture_big_video_memory_f())) {
+		u64 addr = ((u64) (pde_v[0] &
+			gmmu_new_dual_pde_address_big_sys_f(~0)) <<
+			gmmu_new_dual_pde_address_big_shift_v()) |
+			((u64) pde_v[1] << 32);
+		if (addr) {
+			/*
+			 * If small is set that means that somehow MM allowed
+			 * both small and big to be set, the PDE is not valid
+			 * and may be corrupted
+			 */
+			if (pgsz == gmmu_page_size_small) {
+				nvgpu_err(g,
+					"both small and big apertures enabled");
+				return gmmu_nr_page_sizes;
+			}
+		}
+		pgsz = gmmu_page_size_big;
+	}
+
+	return pgsz;
+}
+
 static const struct gk20a_mmu_level gp10b_mm_levels[] = {
 	{.hi_bit = {48, 48},
 	 .lo_bit = {47, 47},
 	 .update_entry = update_gmmu_pde3_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {46, 46},
 	 .lo_bit = {38, 38},
 	 .update_entry = update_gmmu_pde3_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {37, 37},
 	 .lo_bit = {29, 29},
 	 .update_entry = update_gmmu_pde3_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {28, 28},
 	 .lo_bit = {21, 21},
 	 .update_entry = update_gmmu_pde0_locked,
-	 .entry_size = 16},
+	 .entry_size = GP10B_PDE0_ENTRY_SIZE,
+	 .get_pgsz = gp10b_get_pde0_pgsz},
 	{.hi_bit = {20, 20},
 	 .lo_bit = {12, 16},
 	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
 	{.update_entry = NULL}
 };
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 7242fd67..ca07e359 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -200,6 +200,11 @@ struct gk20a_mmu_level {
 			     u64 virt_addr,
 			     struct nvgpu_gmmu_attrs *attrs);
 	u32 entry_size;
+	/*
+	 * Get pde page size
+	 */
+	enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g,
+					 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 };
 
 static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
-- 
cgit v1.2.2