summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2017-10-26 18:00:47 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-01 12:36:07 -0400
commit68dbfedd4f837d1bf2bab128500140f5d8cfadac (patch)
treee670938934170f955819a0a7b5ed73d2d2a63401
parente9b77d72491fdf09e0ff54e4d5cf365861981162 (diff)
gpu: nvgpu: fix pte location functions
Modify the recursive loop in pte_find to make sure it is targeting the proper pde page size. JIRA NVGPUGV100-36 Change-Id: Ib3673d8d9f1bd3c907d532f9e2562ecdc5dda4af Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586739 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c30
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c77
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h5
5 files changed, 112 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 1eed3a3b..d6aaf8cd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -828,6 +828,11 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
828 if (!pd_next->mem) 828 if (!pd_next->mem)
829 return -EINVAL; 829 return -EINVAL;
830 830
831 attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
832
833 if (attrs->pgsz >= gmmu_nr_page_sizes)
834 return -EINVAL;
835
831 return __nvgpu_locate_pte(g, vm, pd_next, 836 return __nvgpu_locate_pte(g, vm, pd_next,
832 vaddr, lvl + 1, attrs, 837 vaddr, lvl + 1, attrs,
833 data, pd_out, pd_idx_out, 838 data, pd_out, pd_idx_out,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 62f19039..cb0c015e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -287,15 +287,35 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
287 pd_write(g, pd, pd_offset + 1, pte_w[1]); 287 pd_write(g, pd, pd_offset + 1, pte_w[1]);
288} 288}
289 289
290enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
291 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
292{
293 /*
294 * big and small page sizes are the same
295 */
296 return gmmu_page_size_small;
297}
298
299enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
300 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
301{
302 /*
303 * return invalid
304 */
305 return gmmu_nr_page_sizes;
306}
307
290const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { 308const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
291 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, 309 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
292 .lo_bit = {26, 26}, 310 .lo_bit = {26, 26},
293 .update_entry = update_gmmu_pde_locked, 311 .update_entry = update_gmmu_pde_locked,
294 .entry_size = 8}, 312 .entry_size = 8,
313 .get_pgsz = gk20a_get_pde_pgsz},
295 {.hi_bit = {25, 25}, 314 {.hi_bit = {25, 25},
296 .lo_bit = {12, 16}, 315 .lo_bit = {12, 16},
297 .update_entry = update_gmmu_pte_locked, 316 .update_entry = update_gmmu_pte_locked,
298 .entry_size = 8}, 317 .entry_size = 8,
318 .get_pgsz = gk20a_get_pte_pgsz},
299 {.update_entry = NULL} 319 {.update_entry = NULL}
300}; 320};
301 321
@@ -303,11 +323,13 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
303 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, 323 {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
304 .lo_bit = {27, 27}, 324 .lo_bit = {27, 27},
305 .update_entry = update_gmmu_pde_locked, 325 .update_entry = update_gmmu_pde_locked,
306 .entry_size = 8}, 326 .entry_size = 8,
327 .get_pgsz = gk20a_get_pde_pgsz},
307 {.hi_bit = {26, 26}, 328 {.hi_bit = {26, 26},
308 .lo_bit = {12, 17}, 329 .lo_bit = {12, 17},
309 .update_entry = update_gmmu_pte_locked, 330 .update_entry = update_gmmu_pte_locked,
310 .entry_size = 8}, 331 .entry_size = 8,
332 .get_pgsz = gk20a_get_pte_pgsz},
311 {.update_entry = NULL} 333 {.update_entry = NULL}
312}; 334};
313 335
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 183d6211..2478ee1f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -183,4 +183,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
183extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; 183extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
184extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; 184extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
185 185
186enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g,
187 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
188enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g,
189 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
186#endif /* MM_GK20A_H */ 190#endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index dc746153..9bfb290a 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
319 pd_write(g, pd, pd_offset + 1, pte_w[1]); 319 pd_write(g, pd, pd_offset + 1, pte_w[1]);
320} 320}
321 321
322#define GP10B_PDE0_ENTRY_SIZE 16
323
324/*
325 * Calculate the pgsz of the pde level
326 * Pascal+ implements a 5 level page table structure with only the last
327 * level having a different number of entries depending on whether it holds
328 * big pages or small pages.
329 */
330static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
331 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
332{
333 u32 pde_base = pd->mem_offs / sizeof(u32);
334 u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
335 u32 i;
336 enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
337
338 if (!pd->mem)
339 return pgsz;
340
341 nvgpu_mem_begin(g, pd->mem);
342 for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
343 pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
344 nvgpu_mem_end(g, pd->mem);
345
346 /*
347 * Check if the aperture AND address are set
348 */
349 if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
350 gmmu_new_dual_pde_aperture_small_video_memory_f())) {
351 u64 addr = ((u64) (pde_v[2] &
352 gmmu_new_dual_pde_address_small_sys_f(~0)) <<
353 gmmu_new_dual_pde_address_shift_v()) |
354 ((u64) pde_v[3] << 32);
355
356 if (addr)
357 pgsz = gmmu_page_size_small;
358 }
359
360 if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
361 gmmu_new_dual_pde_aperture_big_video_memory_f())) {
362 u64 addr = ((u64) (pde_v[0] &
363 gmmu_new_dual_pde_address_big_sys_f(~0)) <<
364 gmmu_new_dual_pde_address_big_shift_v()) |
365 ((u64) pde_v[1] << 32);
366 if (addr) {
367 /*
368 * If small is set that means that somehow MM allowed
369 * both small and big to be set, the PDE is not valid
370 * and may be corrupted
371 */
372 if (pgsz == gmmu_page_size_small) {
373 nvgpu_err(g,
374 "both small and big apertures enabled");
375 return gmmu_nr_page_sizes;
376 }
377 }
378 pgsz = gmmu_page_size_big;
379 }
380
381 return pgsz;
382}
383
322static const struct gk20a_mmu_level gp10b_mm_levels[] = { 384static const struct gk20a_mmu_level gp10b_mm_levels[] = {
323 {.hi_bit = {48, 48}, 385 {.hi_bit = {48, 48},
324 .lo_bit = {47, 47}, 386 .lo_bit = {47, 47},
325 .update_entry = update_gmmu_pde3_locked, 387 .update_entry = update_gmmu_pde3_locked,
326 .entry_size = 8}, 388 .entry_size = 8,
389 .get_pgsz = gk20a_get_pde_pgsz},
327 {.hi_bit = {46, 46}, 390 {.hi_bit = {46, 46},
328 .lo_bit = {38, 38}, 391 .lo_bit = {38, 38},
329 .update_entry = update_gmmu_pde3_locked, 392 .update_entry = update_gmmu_pde3_locked,
330 .entry_size = 8}, 393 .entry_size = 8,
394 .get_pgsz = gk20a_get_pde_pgsz},
331 {.hi_bit = {37, 37}, 395 {.hi_bit = {37, 37},
332 .lo_bit = {29, 29}, 396 .lo_bit = {29, 29},
333 .update_entry = update_gmmu_pde3_locked, 397 .update_entry = update_gmmu_pde3_locked,
334 .entry_size = 8}, 398 .entry_size = 8,
399 .get_pgsz = gk20a_get_pde_pgsz},
335 {.hi_bit = {28, 28}, 400 {.hi_bit = {28, 28},
336 .lo_bit = {21, 21}, 401 .lo_bit = {21, 21},
337 .update_entry = update_gmmu_pde0_locked, 402 .update_entry = update_gmmu_pde0_locked,
338 .entry_size = 16}, 403 .entry_size = GP10B_PDE0_ENTRY_SIZE,
404 .get_pgsz = gp10b_get_pde0_pgsz},
339 {.hi_bit = {20, 20}, 405 {.hi_bit = {20, 20},
340 .lo_bit = {12, 16}, 406 .lo_bit = {12, 16},
341 .update_entry = update_gmmu_pte_locked, 407 .update_entry = update_gmmu_pte_locked,
342 .entry_size = 8}, 408 .entry_size = 8,
409 .get_pgsz = gk20a_get_pte_pgsz},
343 {.update_entry = NULL} 410 {.update_entry = NULL}
344}; 411};
345 412
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index 7242fd67..ca07e359 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -200,6 +200,11 @@ struct gk20a_mmu_level {
200 u64 virt_addr, 200 u64 virt_addr,
201 struct nvgpu_gmmu_attrs *attrs); 201 struct nvgpu_gmmu_attrs *attrs);
202 u32 entry_size; 202 u32 entry_size;
203 /*
204 * Get pde page size
205 */
206 enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g,
207 struct nvgpu_gmmu_pd *pd, u32 pd_idx);
203}; 208};
204 209
205static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) 210static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)