From 68dbfedd4f837d1bf2bab128500140f5d8cfadac Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 26 Oct 2017 15:00:47 -0700
Subject: gpu: nvgpu: fix pte location functions

Modify the recursive loop in pte_find to make sure it is targeting the proper
pde page size.

JIRA NVGPUGV100-36

Change-Id: Ib3673d8d9f1bd3c907d532f9e2562ecdc5dda4af
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1586739
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 77 +++++++++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/nvgpu/gp10b')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index dc746153..9bfb290a 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 	pd_write(g, pd, pd_offset + 1, pte_w[1]);
 }
 
+#define GP10B_PDE0_ENTRY_SIZE 16
+
+/*
+ * Calculate the pgsz of the pde level
+ * Pascal+ implements a 5 level page table structure with only the last
+ * level having a different number of entries depending on whether it holds
+ * big pages or small pages.
+ */
+static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	u32 pde_base = pd->mem_offs / sizeof(u32);
+	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
+	u32 i;
+	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
+
+	if (!pd->mem)
+		return pgsz;
+
+	nvgpu_mem_begin(g, pd->mem);
+	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
+		pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
+	nvgpu_mem_end(g, pd->mem);
+
+	/*
+	 * Check if the aperture AND address are set
+	 */
+	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
+			gmmu_new_dual_pde_aperture_small_video_memory_f())) {
+		u64 addr = ((u64) (pde_v[2] &
+			gmmu_new_dual_pde_address_small_sys_f(~0)) <<
+			gmmu_new_dual_pde_address_shift_v()) |
+			((u64) pde_v[3] << 32);
+
+		if (addr)
+			pgsz = gmmu_page_size_small;
+	}
+
+	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
+			gmmu_new_dual_pde_aperture_big_video_memory_f())) {
+		u64 addr = ((u64) (pde_v[0] &
+			gmmu_new_dual_pde_address_big_sys_f(~0)) <<
+			gmmu_new_dual_pde_address_big_shift_v()) |
+			((u64) pde_v[1] << 32);
+		if (addr) {
+			/*
+			 * If small is set that means that somehow MM allowed
+			 * both small and big to be set, the PDE is not valid
+			 * and may be corrupted
+			 */
+			if (pgsz == gmmu_page_size_small) {
+				nvgpu_err(g,
+					"both small and big apertures enabled");
+				return gmmu_nr_page_sizes;
+			}
+		}
+		pgsz = gmmu_page_size_big;
+	}
+
+	return pgsz;
+}
+
 static const struct gk20a_mmu_level gp10b_mm_levels[] = {
 	{.hi_bit = {48, 48},
 	 .lo_bit = {47, 47},
 	 .update_entry = update_gmmu_pde3_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {46, 46},
 	 .lo_bit = {38, 38},
 	 .update_entry = update_gmmu_pde3_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {37, 37},
 	 .lo_bit = {29, 29},
 	 .update_entry = update_gmmu_pde3_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
 	{.hi_bit = {28, 28},
 	 .lo_bit = {21, 21},
 	 .update_entry = update_gmmu_pde0_locked,
-	 .entry_size = 16},
+	 .entry_size = GP10B_PDE0_ENTRY_SIZE,
+	 .get_pgsz = gp10b_get_pde0_pgsz},
 	{.hi_bit = {20, 20},
 	 .lo_bit = {12, 16},
 	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8},
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
 	{.update_entry = NULL}
 };
 
-- 
cgit v1.2.2