gpu: nvgpu: fix pte location functions

Modify the recursive loop in pte_find to make sure it is targeting the proper pde page size. JIRA NVGPUGV100-36 Change-Id: Ib3673d8d9f1bd3c907d532f9e2562ecdc5dda4af Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586739 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: David Nieto <dmartineznie@nvidia.com> 2017-10-26 18:00:47 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-01 12:36:07 -0400
commit: 68dbfedd4f837d1bf2bab128500140f5d8cfadac (patch)
tree: e670938934170f955819a0a7b5ed73d2d2a63401
parent: e9b77d72491fdf09e0ff54e4d5cf365861981162 (diff)
5 files changed, 112 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 1eed3a3b..d6aaf8cd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -828,6 +828,11 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
                if (!pd_next->mem)
                        return -EINVAL;
+                attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
+                if (attrs->pgsz >= gmmu_nr_page_sizes)
+                        return -EINVAL;
                return __nvgpu_locate_pte(g, vm, pd_next,
                                          vaddr, lvl + 1, attrs,
                                          data, pd_out, pd_idx_out,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 62f19039..cb0c015e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,15 +287,35 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
        pd_write(g, pd, pd_offset + 1, pte_w[1]);
 }
+enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+        /*
+         * big and small page sizes are the same
+         */
+        return gmmu_page_size_small;
+}
+enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+        /*
+         * return invalid
+         */
+        return gmmu_nr_page_sizes;
+}
 const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
        {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
         .lo_bit = {26, 26},
         .update_entry = update_gmmu_pde_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
        {.hi_bit = {25, 25},
         .lo_bit = {12, 16},
         .update_entry = update_gmmu_pte_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pte_pgsz},
        {.update_entry = NULL}
 };
@@ -303,11 +323,13 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
        {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
         .lo_bit = {27, 27},
         .update_entry = update_gmmu_pde_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
        {.hi_bit = {26, 26},
         .lo_bit = {12, 17},
         .update_entry = update_gmmu_pte_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pte_pgsz},
        {.update_entry = NULL}
 };
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 183d6211..2478ee1f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -183,4 +183,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
 extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
 extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
+enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
+                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
+                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 #endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index dc746153..9bfb290a 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
        pd_write(g, pd, pd_offset + 1, pte_w[1]);
 }
+#define GP10B_PDE0_ENTRY_SIZE 16
+/*
+ * Calculate the pgsz of the pde level
+ * Pascal+ implements a 5 level page table structure with only the last
+ * level having a different number of entries depending on whether it holds
+ * big pages or small pages.
+ */
+static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
+                                        struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+        u32 pde_base = pd->mem_offs / sizeof(u32);
+        u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
+        u32 i;
+        enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
+        if (!pd->mem)
+                return pgsz;
+        nvgpu_mem_begin(g, pd->mem);
+        for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
+                pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
+        nvgpu_mem_end(g, pd->mem);
+        /*
+         * Check if the aperture AND address are set
+         */
+        if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
+                        gmmu_new_dual_pde_aperture_small_video_memory_f())) {
+                u64 addr = ((u64) (pde_v[2] &
+                        gmmu_new_dual_pde_address_small_sys_f(~0)) <<
+                        gmmu_new_dual_pde_address_shift_v()) |
+                        ((u64) pde_v[3] << 32);
+                if (addr)
+                        pgsz = gmmu_page_size_small;
+        }
+        if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
+                        gmmu_new_dual_pde_aperture_big_video_memory_f())) {
+                u64 addr = ((u64) (pde_v[0] &
+                        gmmu_new_dual_pde_address_big_sys_f(~0)) <<
+                        gmmu_new_dual_pde_address_big_shift_v()) |
+                        ((u64) pde_v[1] << 32);
+                if (addr) {
+                        /*
+                         * If small is set that means that somehow MM allowed
+                         * both small and big to be set, the PDE is not valid
+                         * and may be corrupted
+                         */
+                        if (pgsz == gmmu_page_size_small) {
+                                nvgpu_err(g,
+                                        "both small and big apertures enabled");
+                                return gmmu_nr_page_sizes;
+                        }
+                }
+                pgsz = gmmu_page_size_big;
+        }
+        return pgsz;
+}
 static const struct gk20a_mmu_level gp10b_mm_levels[] = {
        {.hi_bit = {48, 48},
         .lo_bit = {47, 47},
         .update_entry = update_gmmu_pde3_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
        {.hi_bit = {46, 46},
         .lo_bit = {38, 38},
         .update_entry = update_gmmu_pde3_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
        {.hi_bit = {37, 37},
         .lo_bit = {29, 29},
         .update_entry = update_gmmu_pde3_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pde_pgsz},
        {.hi_bit = {28, 28},
         .lo_bit = {21, 21},
         .update_entry = update_gmmu_pde0_locked,
-         .entry_size = 16},
+         .entry_size = GP10B_PDE0_ENTRY_SIZE,
+         .get_pgsz = gp10b_get_pde0_pgsz},
        {.hi_bit = {20, 20},
         .lo_bit = {12, 16},
         .update_entry = update_gmmu_pte_locked,
-         .entry_size = 8},
+         .entry_size = 8,
+         .get_pgsz = gk20a_get_pte_pgsz},
        {.update_entry = NULL}
 };
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 7242fd67..ca07e359 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -200,6 +200,11 @@ struct gk20a_mmu_level {
                             u64 virt_addr,
                             struct nvgpu_gmmu_attrs *attrs);
        u32 entry_size;
+        /*
+         * Get pde page size
+         */
+        enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g,
+                                         struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 };
 static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
author	David Nieto <dmartineznie@nvidia.com>	2017-10-26 18:00:47 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-01 12:36:07 -0400
commit	68dbfedd4f837d1bf2bab128500140f5d8cfadac (patch)
tree	e670938934170f955819a0a7b5ed73d2d2a63401
parent	e9b77d72491fdf09e0ff54e4d5cf365861981162 (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1eed3a3b..d6aaf8cd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -828,6 +828,11 @@ static int __nvgpu_locate_pte(struct gk20a g, struct vm_gk20a vm,
828	if (!pd_next->mem)	828	if (!pd_next->mem)
829	return -EINVAL;	829	return -EINVAL;
830		830
		831	attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
		832
		833	if (attrs->pgsz >= gmmu_nr_page_sizes)
		834	return -EINVAL;
		835
831	return __nvgpu_locate_pte(g, vm, pd_next,	836	return __nvgpu_locate_pte(g, vm, pd_next,
832	vaddr, lvl + 1, attrs,	837	vaddr, lvl + 1, attrs,
833	data, pd_out, pd_idx_out,	838	data, pd_out, pd_idx_out,


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 62f19039..cb0c015e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,15 +287,35 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
287	pd_write(g, pd, pd_offset + 1, pte_w[1]);	287	pd_write(g, pd, pd_offset + 1, pte_w[1]);
288	}	288	}
289		289
		290	enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
		291	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
		292	{
		293	/*
		294	* big and small page sizes are the same
		295	*/
		296	return gmmu_page_size_small;
		297	}
		298
		299	enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
		300	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
		301	{
		302	/*
		303	* return invalid
		304	*/
		305	return gmmu_nr_page_sizes;
		306	}
		307
290	const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {	308	const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
291	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},	309	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
292	.lo_bit = {26, 26},	310	.lo_bit = {26, 26},
293	.update_entry = update_gmmu_pde_locked,	311	.update_entry = update_gmmu_pde_locked,
294	.entry_size = 8},	312	.entry_size = 8,
		313	.get_pgsz = gk20a_get_pde_pgsz},
295	{.hi_bit = {25, 25},	314	{.hi_bit = {25, 25},
296	.lo_bit = {12, 16},	315	.lo_bit = {12, 16},
297	.update_entry = update_gmmu_pte_locked,	316	.update_entry = update_gmmu_pte_locked,
298	.entry_size = 8},	317	.entry_size = 8,
		318	.get_pgsz = gk20a_get_pte_pgsz},
299	{.update_entry = NULL}	319	{.update_entry = NULL}
300	};	320	};
301		321
@@ -303,11 +323,13 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
303	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},	323	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
304	.lo_bit = {27, 27},	324	.lo_bit = {27, 27},
305	.update_entry = update_gmmu_pde_locked,	325	.update_entry = update_gmmu_pde_locked,
306	.entry_size = 8},	326	.entry_size = 8,
		327	.get_pgsz = gk20a_get_pde_pgsz},
307	{.hi_bit = {26, 26},	328	{.hi_bit = {26, 26},
308	.lo_bit = {12, 17},	329	.lo_bit = {12, 17},
309	.update_entry = update_gmmu_pte_locked,	330	.update_entry = update_gmmu_pte_locked,
310	.entry_size = 8},	331	.entry_size = 8,
		332	.get_pgsz = gk20a_get_pte_pgsz},
311	{.update_entry = NULL}	333	{.update_entry = NULL}
312	};	334	};
313		335


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 183d6211..2478ee1f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -183,4 +183,8 @@ void gk20a_mm_init_pdb(struct gk20a g, struct nvgpu_mem mem,
183	extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];	183	extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
184	extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];	184	extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
185		185
		186	enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
		187	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
		188	enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
		189	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
186	#endif /* MM_GK20A_H */	190	#endif /* MM_GK20A_H */


diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index dc746153..9bfb290a 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
319	pd_write(g, pd, pd_offset + 1, pte_w[1]);	319	pd_write(g, pd, pd_offset + 1, pte_w[1]);
320	}	320	}
321		321
		322	#define GP10B_PDE0_ENTRY_SIZE 16
		323
		324	/*
		325	* Calculate the pgsz of the pde level
		326	* Pascal+ implements a 5 level page table structure with only the last
		327	* level having a different number of entries depending on whether it holds
		328	* big pages or small pages.
		329	*/
		330	static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
		331	struct nvgpu_gmmu_pd *pd, u32 pd_idx)
		332	{
		333	u32 pde_base = pd->mem_offs / sizeof(u32);
		334	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
		335	u32 i;
		336	enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
		337
		338	if (!pd->mem)
		339	return pgsz;
		340
		341	nvgpu_mem_begin(g, pd->mem);
		342	for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
		343	pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
		344	nvgpu_mem_end(g, pd->mem);
		345
		346	/*
		347	* Check if the aperture AND address are set
		348	*/
		349	if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() \|\|
		350	gmmu_new_dual_pde_aperture_small_video_memory_f())) {
		351	u64 addr = ((u64) (pde_v[2] &
		352	gmmu_new_dual_pde_address_small_sys_f(~0)) <<
		353	gmmu_new_dual_pde_address_shift_v()) \|
		354	((u64) pde_v[3] << 32);
		355
		356	if (addr)
		357	pgsz = gmmu_page_size_small;
		358	}
		359
		360	if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() \|
		361	gmmu_new_dual_pde_aperture_big_video_memory_f())) {
		362	u64 addr = ((u64) (pde_v[0] &
		363	gmmu_new_dual_pde_address_big_sys_f(~0)) <<
		364	gmmu_new_dual_pde_address_big_shift_v()) \|
		365	((u64) pde_v[1] << 32);
		366	if (addr) {
		367	/*
		368	* If small is set that means that somehow MM allowed
		369	* both small and big to be set, the PDE is not valid
		370	* and may be corrupted
		371	*/
		372	if (pgsz == gmmu_page_size_small) {
		373	nvgpu_err(g,
		374	"both small and big apertures enabled");
		375	return gmmu_nr_page_sizes;
		376	}
		377	}
		378	pgsz = gmmu_page_size_big;
		379	}
		380
		381	return pgsz;
		382	}
		383
322	static const struct gk20a_mmu_level gp10b_mm_levels[] = {	384	static const struct gk20a_mmu_level gp10b_mm_levels[] = {
323	{.hi_bit = {48, 48},	385	{.hi_bit = {48, 48},
324	.lo_bit = {47, 47},	386	.lo_bit = {47, 47},
325	.update_entry = update_gmmu_pde3_locked,	387	.update_entry = update_gmmu_pde3_locked,
326	.entry_size = 8},	388	.entry_size = 8,
		389	.get_pgsz = gk20a_get_pde_pgsz},
327	{.hi_bit = {46, 46},	390	{.hi_bit = {46, 46},
328	.lo_bit = {38, 38},	391	.lo_bit = {38, 38},
329	.update_entry = update_gmmu_pde3_locked,	392	.update_entry = update_gmmu_pde3_locked,
330	.entry_size = 8},	393	.entry_size = 8,
		394	.get_pgsz = gk20a_get_pde_pgsz},
331	{.hi_bit = {37, 37},	395	{.hi_bit = {37, 37},
332	.lo_bit = {29, 29},	396	.lo_bit = {29, 29},
333	.update_entry = update_gmmu_pde3_locked,	397	.update_entry = update_gmmu_pde3_locked,
334	.entry_size = 8},	398	.entry_size = 8,
		399	.get_pgsz = gk20a_get_pde_pgsz},
335	{.hi_bit = {28, 28},	400	{.hi_bit = {28, 28},
336	.lo_bit = {21, 21},	401	.lo_bit = {21, 21},
337	.update_entry = update_gmmu_pde0_locked,	402	.update_entry = update_gmmu_pde0_locked,
338	.entry_size = 16},	403	.entry_size = GP10B_PDE0_ENTRY_SIZE,
		404	.get_pgsz = gp10b_get_pde0_pgsz},
339	{.hi_bit = {20, 20},	405	{.hi_bit = {20, 20},
340	.lo_bit = {12, 16},	406	.lo_bit = {12, 16},
341	.update_entry = update_gmmu_pte_locked,	407	.update_entry = update_gmmu_pte_locked,
342	.entry_size = 8},	408	.entry_size = 8,
		409	.get_pgsz = gk20a_get_pte_pgsz},
343	{.update_entry = NULL}	410	{.update_entry = NULL}
344	};	411	};
345		412


diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 7242fd67..ca07e359 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -200,6 +200,11 @@ struct gk20a_mmu_level {
200	u64 virt_addr,	200	u64 virt_addr,
201	struct nvgpu_gmmu_attrs *attrs);	201	struct nvgpu_gmmu_attrs *attrs);
202	u32 entry_size;	202	u32 entry_size;
		203	/*
		204	* Get pde page size
		205	*/
		206	enum gmmu_pgsz_gk20a (get_pgsz)(struct gk20a g,
		207	struct nvgpu_gmmu_pd *pd, u32 pd_idx);
203	};	208	};
204		209
205	static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)	210	static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)