gpu: nvgpu: fix indexing in locate pte function

The current code does not properly calculate the indexes within the PDE to access the proper entry, and it has a bug in assignement of the big page entries. This change fixes the issue by: (1) Passing a pointer to the level structure and dereferencing the index offset to the next level. (2) Changing the format of the address. (3) Ensuring big pages are only selected if their address is set. Bug 200364599 Change-Id: I46e32560ee341d8cfc08c077282dcb5549d2a140 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1610562 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Deepak Bhosale <dbhosale@nvidia.com>
author: David Nieto <dmartineznie@nvidia.com> 2017-11-30 14:05:14 -0500
committer: Deepak Bhosale <dbhosale@nvidia.com> 2017-12-05 13:25:37 -0500
commit: 1b125d8cbe05766c0cfb0ba9cac2bf46ffef7c3a (patch)
tree: 8566528813e08666431abea7a0063816a7b15156
parent: fa2a3e2d3dd780d409e7673e7b47a34fd1cafb4f (diff)
5 files changed, 18 insertions, 12 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 62c84e96..22f73083 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -838,7 +838,7 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
                if (!pd_next->mem)
                        return -EINVAL;
-                attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
+                attrs->pgsz = l->get_pgsz(g, l, pd, pd_idx);
                if (attrs->pgsz >= gmmu_nr_page_sizes)
                        return -EINVAL;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 3635bfc2..0b383a83 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,6 +287,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 }
 enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+                                        const struct gk20a_mmu_level *l,
                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
        /*
@@ -296,6 +297,7 @@ enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
 }
 enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+                                        const struct gk20a_mmu_level *l,
                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
        /*
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2478ee1f..ee0c2a07 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -184,7 +184,9 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
 extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
 enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+                                        const struct gk20a_mmu_level *l,
                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+                                        const struct gk20a_mmu_level *l,
                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 #endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index bb52aeb0..8cefbd3e 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -328,9 +328,11 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 * big pages or small pages.
 */
 static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
+                                        const struct gk20a_mmu_level *l,
                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
        u32 pde_base = pd->mem_offs / sizeof(u32);
+        u32 pde_offset = pde_base + pd_offset_from_index(l, pd_idx);
        u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
        u32 i;
        enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
@@ -340,18 +342,17 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
        nvgpu_mem_begin(g, pd->mem);
        for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
-                pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
+                pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_offset + i);
        nvgpu_mem_end(g, pd->mem);
        /*
         * Check if the aperture AND address are set
         */
-        if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
+        if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() |
                        gmmu_new_dual_pde_aperture_small_video_memory_f())) {
-                u64 addr = ((u64) (pde_v[2] &
+                u64 addr = (((u64) pde_v[3] << 32) | (u64) (pde_v[2] &
-                        gmmu_new_dual_pde_address_small_sys_f(~0)) <<
+                        gmmu_new_dual_pde_address_small_sys_f(~0))) <<
-                        gmmu_new_dual_pde_address_shift_v()) |
+                        gmmu_new_dual_pde_address_shift_v();
-                        ((u64) pde_v[3] << 32);
                if (addr)
                        pgsz = gmmu_page_size_small;
@@ -359,10 +360,10 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
        if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
                        gmmu_new_dual_pde_aperture_big_video_memory_f())) {
-                u64 addr = ((u64) (pde_v[0] &
+                u64 addr = (((u64) pde_v[1] << 32) | (u64) (pde_v[0] &
-                        gmmu_new_dual_pde_address_big_sys_f(~0)) <<
+                        gmmu_new_dual_pde_address_big_sys_f(~0))) <<
-                        gmmu_new_dual_pde_address_big_shift_v()) |
+                        gmmu_new_dual_pde_address_big_shift_v();
-                        ((u64) pde_v[1] << 32);
                if (addr) {
                        /*
                         * If small is set that means that somehow MM allowed
@@ -374,8 +375,8 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
                                        "both small and big apertures enabled");
                                return gmmu_nr_page_sizes;
                        }
+                        pgsz = gmmu_page_size_big;
                }
-                pgsz = gmmu_page_size_big;
        }
        return pgsz;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index ca07e359..ade94df9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -204,6 +204,7 @@ struct gk20a_mmu_level {
         * Get pde page size
         */
        enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g,
+                                         const struct gk20a_mmu_level *l,
                                         struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 };
author	David Nieto <dmartineznie@nvidia.com>	2017-11-30 14:05:14 -0500
committer	Deepak Bhosale <dbhosale@nvidia.com>	2017-12-05 13:25:37 -0500
commit	1b125d8cbe05766c0cfb0ba9cac2bf46ffef7c3a (patch)
tree	8566528813e08666431abea7a0063816a7b15156
parent	fa2a3e2d3dd780d409e7673e7b47a34fd1cafb4f (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 62c84e96..22f73083 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -838,7 +838,7 @@ static int __nvgpu_locate_pte(struct gk20a g, struct vm_gk20a vm,
838	if (!pd_next->mem)	838	if (!pd_next->mem)
839	return -EINVAL;	839	return -EINVAL;
840		840
841	attrs->pgsz = l->get_pgsz(g, pd, pd_idx);	841	attrs->pgsz = l->get_pgsz(g, l, pd, pd_idx);
842		842
843	if (attrs->pgsz >= gmmu_nr_page_sizes)	843	if (attrs->pgsz >= gmmu_nr_page_sizes)
844	return -EINVAL;	844	return -EINVAL;


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 3635bfc2..0b383a83 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,6 +287,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
287	}	287	}
288		288
289	enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,	289	enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
		290	const struct gk20a_mmu_level *l,
290	struct nvgpu_gmmu_pd *pd, u32 pd_idx)	291	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
291	{	292	{
292	/*	293	/*
@@ -296,6 +297,7 @@ enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
296	}	297	}
297		298
298	enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,	299	enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
		300	const struct gk20a_mmu_level *l,
299	struct nvgpu_gmmu_pd *pd, u32 pd_idx)	301	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
300	{	302	{
301	/*	303	/*


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 2478ee1f..ee0c2a07 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -184,7 +184,9 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
184	extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];	184	extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
185		185
186	enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,	186	enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
		187	const struct gk20a_mmu_level *l,
187	struct nvgpu_gmmu_pd *pd, u32 pd_idx);	188	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
188	enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,	189	enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
		190	const struct gk20a_mmu_level *l,
189	struct nvgpu_gmmu_pd *pd, u32 pd_idx);	191	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
190	#endif /* MM_GK20A_H */	192	#endif /* MM_GK20A_H */


diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index bb52aeb0..8cefbd3e 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -328,9 +328,11 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
328	* big pages or small pages.	328	* big pages or small pages.
329	*/	329	*/
330	static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,	330	static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
		331	const struct gk20a_mmu_level *l,
331	struct nvgpu_gmmu_pd *pd, u32 pd_idx)	332	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
332	{	333	{
333	u32 pde_base = pd->mem_offs / sizeof(u32);	334	u32 pde_base = pd->mem_offs / sizeof(u32);
		335	u32 pde_offset = pde_base + pd_offset_from_index(l, pd_idx);
334	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];	336	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
335	u32 i;	337	u32 i;
336	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;	338	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
@@ -340,18 +342,17 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
340		342
341	nvgpu_mem_begin(g, pd->mem);	343	nvgpu_mem_begin(g, pd->mem);
342	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)	344	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
343	pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);	345	pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_offset + i);
344	nvgpu_mem_end(g, pd->mem);	346	nvgpu_mem_end(g, pd->mem);
345		347
346	/*	348	/*
347	* Check if the aperture AND address are set	349	* Check if the aperture AND address are set
348	*/	350	*/
349	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() \|\|	351	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() \|
350	gmmu_new_dual_pde_aperture_small_video_memory_f())) {	352	gmmu_new_dual_pde_aperture_small_video_memory_f())) {
351	u64 addr = ((u64) (pde_v[2] &	353	u64 addr = (((u64) pde_v[3] << 32) \| (u64) (pde_v[2] &
352	gmmu_new_dual_pde_address_small_sys_f(~0)) <<	354	gmmu_new_dual_pde_address_small_sys_f(~0))) <<
353	gmmu_new_dual_pde_address_shift_v()) \|	355	gmmu_new_dual_pde_address_shift_v();
354	((u64) pde_v[3] << 32);
355		356
356	if (addr)	357	if (addr)
357	pgsz = gmmu_page_size_small;	358	pgsz = gmmu_page_size_small;
@@ -359,10 +360,10 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
359		360
360	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() \|	361	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() \|
361	gmmu_new_dual_pde_aperture_big_video_memory_f())) {	362	gmmu_new_dual_pde_aperture_big_video_memory_f())) {
362	u64 addr = ((u64) (pde_v[0] &	363	u64 addr = (((u64) pde_v[1] << 32) \| (u64) (pde_v[0] &
363	gmmu_new_dual_pde_address_big_sys_f(~0)) <<	364	gmmu_new_dual_pde_address_big_sys_f(~0))) <<
364	gmmu_new_dual_pde_address_big_shift_v()) \|	365	gmmu_new_dual_pde_address_big_shift_v();
365	((u64) pde_v[1] << 32);	366
366	if (addr) {	367	if (addr) {
367	/*	368	/*
368	* If small is set that means that somehow MM allowed	369	* If small is set that means that somehow MM allowed
@@ -374,8 +375,8 @@ static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
374	"both small and big apertures enabled");	375	"both small and big apertures enabled");
375	return gmmu_nr_page_sizes;	376	return gmmu_nr_page_sizes;
376	}	377	}
		378	pgsz = gmmu_page_size_big;
377	}	379	}
378	pgsz = gmmu_page_size_big;
379	}	380	}
380		381
381	return pgsz;	382	return pgsz;


diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index ca07e359..ade94df9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -204,6 +204,7 @@ struct gk20a_mmu_level {
204	* Get pde page size	204	* Get pde page size
205	*/	205	*/
206	enum gmmu_pgsz_gk20a (get_pgsz)(struct gk20a g,	206	enum gmmu_pgsz_gk20a (get_pgsz)(struct gk20a g,
		207	const struct gk20a_mmu_level *l,
207	struct nvgpu_gmmu_pd *pd, u32 pd_idx);	208	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
208	};	209	};
209		210