From 1b125d8cbe05766c0cfb0ba9cac2bf46ffef7c3a Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 30 Nov 2017 11:05:14 -0800
Subject: gpu: nvgpu: fix indexing in locate pte function

The current code does not properly calculate the indexes within the PDE
to access the proper entry, and it has a bug in assignement of the big
page entries. This change fixes the issue by:

(1) Passing a pointer to the level structure and dereferencing the
index offset to the next level.
(2) Changing the format of the address.
(3) Ensuring big pages are only selected if their address is set.

Bug 200364599

Change-Id: I46e32560ee341d8cfc08c077282dcb5549d2a140
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1610562
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Deepak Bhosale <dbhosale@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu.c     |  2 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c     |  2 ++
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h     |  2 ++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c     | 23 ++++++++++++-----------
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h |  1 +
 5 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 62c84e96..22f73083 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -838,7 +838,7 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
 		if (!pd_next->mem)
 			return -EINVAL;
 
-		attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
+		attrs->pgsz = l->get_pgsz(g, l, pd, pd_idx);
 
 		if (attrs->pgsz >= gmmu_nr_page_sizes)
 			return -EINVAL;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 3635bfc2..0b383a83 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,6 +287,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 }
 
 enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+					const struct gk20a_mmu_level *l,
 					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
 	/*
@@ -296,6 +297,7 @@ enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
 }
 
 enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+					const struct gk20a_mmu_level *l,
 					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
 	/*
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2478ee1f..ee0c2a07 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -184,7 +184,9 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
 extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
 
 enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+					const struct gk20a_mmu_level *l,
 					struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+					const struct gk20a_mmu_level *l,
 					struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 #endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index bb52aeb0..8cefbd3e 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -328,9 +328,11 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
  * big pages or small pages.
  */
 static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
+					const struct gk20a_mmu_level *l,
 					struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
 	u32 pde_base = pd->mem_offs / sizeof(u32);
+	u32 pde_offset = pde_base + pd_offset_from_index(l, pd_idx);
 	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
 	u32 i;
 	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
@@ -340,18 +342,17 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
 
 	nvgpu_mem_begin(g, pd->mem);
 	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
-		pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
+		pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_offset + i);
 	nvgpu_mem_end(g, pd->mem);
 
 	/*
 	 * Check if the aperture AND address are set
 	 */
-	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
+	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() |
 			gmmu_new_dual_pde_aperture_small_video_memory_f())) {
-		u64 addr = ((u64) (pde_v[2] &
-			gmmu_new_dual_pde_address_small_sys_f(~0)) <<
-			gmmu_new_dual_pde_address_shift_v()) |
-			((u64) pde_v[3] << 32);
+		u64 addr = (((u64) pde_v[3] << 32) | (u64) (pde_v[2] &
+			gmmu_new_dual_pde_address_small_sys_f(~0))) <<
+			gmmu_new_dual_pde_address_shift_v();
 
 		if (addr)
 			pgsz = gmmu_page_size_small;
@@ -359,10 +360,10 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
 
 	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
 			gmmu_new_dual_pde_aperture_big_video_memory_f())) {
-		u64 addr = ((u64) (pde_v[0] &
-			gmmu_new_dual_pde_address_big_sys_f(~0)) <<
-			gmmu_new_dual_pde_address_big_shift_v()) |
-			((u64) pde_v[1] << 32);
+		u64 addr = (((u64) pde_v[1] << 32) | (u64) (pde_v[0] &
+			gmmu_new_dual_pde_address_big_sys_f(~0))) <<
+			gmmu_new_dual_pde_address_big_shift_v();
+
 		if (addr) {
 			/*
 			 * If small is set that means that somehow MM allowed
@@ -374,8 +375,8 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
 					"both small and big apertures enabled");
 				return gmmu_nr_page_sizes;
 			}
+			pgsz = gmmu_page_size_big;
 		}
-		pgsz = gmmu_page_size_big;
 	}
 
 	return pgsz;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index ca07e359..ade94df9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -204,6 +204,7 @@ struct gk20a_mmu_level {
 	 * Get pde page size
 	 */
 	enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g,
+					 const struct gk20a_mmu_level *l,
 					 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 };
 
-- 
cgit v1.2.2