diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-10-26 18:00:47 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-01 12:36:07 -0400 |
commit | 68dbfedd4f837d1bf2bab128500140f5d8cfadac (patch) | |
tree | e670938934170f955819a0a7b5ed73d2d2a63401 | |
parent | e9b77d72491fdf09e0ff54e4d5cf365861981162 (diff) |
gpu: nvgpu: fix pte location functions
Modify the recursive loop in pte_find to make sure it is targeting the proper
pde page size.
JIRA NVGPUGV100-36
Change-Id: Ib3673d8d9f1bd3c907d532f9e2562ecdc5dda4af
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1586739
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 30 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 77 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 5 |
5 files changed, 112 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1eed3a3b..d6aaf8cd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -828,6 +828,11 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, | |||
828 | if (!pd_next->mem) | 828 | if (!pd_next->mem) |
829 | return -EINVAL; | 829 | return -EINVAL; |
830 | 830 | ||
831 | attrs->pgsz = l->get_pgsz(g, pd, pd_idx); | ||
832 | |||
833 | if (attrs->pgsz >= gmmu_nr_page_sizes) | ||
834 | return -EINVAL; | ||
835 | |||
831 | return __nvgpu_locate_pte(g, vm, pd_next, | 836 | return __nvgpu_locate_pte(g, vm, pd_next, |
832 | vaddr, lvl + 1, attrs, | 837 | vaddr, lvl + 1, attrs, |
833 | data, pd_out, pd_idx_out, | 838 | data, pd_out, pd_idx_out, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 62f19039..cb0c015e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -287,15 +287,35 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
287 | pd_write(g, pd, pd_offset + 1, pte_w[1]); | 287 | pd_write(g, pd, pd_offset + 1, pte_w[1]); |
288 | } | 288 | } |
289 | 289 | ||
290 | enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g, | ||
291 | struct nvgpu_gmmu_pd *pd, u32 pd_idx) | ||
292 | { | ||
293 | /* | ||
294 | * big and small page sizes are the same | ||
295 | */ | ||
296 | return gmmu_page_size_small; | ||
297 | } | ||
298 | |||
299 | enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g, | ||
300 | struct nvgpu_gmmu_pd *pd, u32 pd_idx) | ||
301 | { | ||
302 | /* | ||
303 | * return invalid | ||
304 | */ | ||
305 | return gmmu_nr_page_sizes; | ||
306 | } | ||
307 | |||
290 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { | 308 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { |
291 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | 309 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, |
292 | .lo_bit = {26, 26}, | 310 | .lo_bit = {26, 26}, |
293 | .update_entry = update_gmmu_pde_locked, | 311 | .update_entry = update_gmmu_pde_locked, |
294 | .entry_size = 8}, | 312 | .entry_size = 8, |
313 | .get_pgsz = gk20a_get_pde_pgsz}, | ||
295 | {.hi_bit = {25, 25}, | 314 | {.hi_bit = {25, 25}, |
296 | .lo_bit = {12, 16}, | 315 | .lo_bit = {12, 16}, |
297 | .update_entry = update_gmmu_pte_locked, | 316 | .update_entry = update_gmmu_pte_locked, |
298 | .entry_size = 8}, | 317 | .entry_size = 8, |
318 | .get_pgsz = gk20a_get_pte_pgsz}, | ||
299 | {.update_entry = NULL} | 319 | {.update_entry = NULL} |
300 | }; | 320 | }; |
301 | 321 | ||
@@ -303,11 +323,13 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { | |||
303 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | 323 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, |
304 | .lo_bit = {27, 27}, | 324 | .lo_bit = {27, 27}, |
305 | .update_entry = update_gmmu_pde_locked, | 325 | .update_entry = update_gmmu_pde_locked, |
306 | .entry_size = 8}, | 326 | .entry_size = 8, |
327 | .get_pgsz = gk20a_get_pde_pgsz}, | ||
307 | {.hi_bit = {26, 26}, | 328 | {.hi_bit = {26, 26}, |
308 | .lo_bit = {12, 17}, | 329 | .lo_bit = {12, 17}, |
309 | .update_entry = update_gmmu_pte_locked, | 330 | .update_entry = update_gmmu_pte_locked, |
310 | .entry_size = 8}, | 331 | .entry_size = 8, |
332 | .get_pgsz = gk20a_get_pte_pgsz}, | ||
311 | {.update_entry = NULL} | 333 | {.update_entry = NULL} |
312 | }; | 334 | }; |
313 | 335 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 183d6211..2478ee1f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -183,4 +183,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, | |||
183 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | 183 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; |
184 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | 184 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; |
185 | 185 | ||
186 | enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g, | ||
187 | struct nvgpu_gmmu_pd *pd, u32 pd_idx); | ||
188 | enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g, | ||
189 | struct nvgpu_gmmu_pd *pd, u32 pd_idx); | ||
186 | #endif /* MM_GK20A_H */ | 190 | #endif /* MM_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index dc746153..9bfb290a 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
319 | pd_write(g, pd, pd_offset + 1, pte_w[1]); | 319 | pd_write(g, pd, pd_offset + 1, pte_w[1]); |
320 | } | 320 | } |
321 | 321 | ||
322 | #define GP10B_PDE0_ENTRY_SIZE 16 | ||
323 | |||
324 | /* | ||
325 | * Calculate the pgsz of the pde level | ||
326 | * Pascal+ implements a 5 level page table structure with only the last | ||
327 | * level having a different number of entries depending on whether it holds | ||
328 | * big pages or small pages. | ||
329 | */ | ||
330 | static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g, | ||
331 | struct nvgpu_gmmu_pd *pd, u32 pd_idx) | ||
332 | { | ||
333 | u32 pde_base = pd->mem_offs / sizeof(u32); | ||
334 | u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2]; | ||
335 | u32 i; | ||
336 | enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes; | ||
337 | |||
338 | if (!pd->mem) | ||
339 | return pgsz; | ||
340 | |||
341 | nvgpu_mem_begin(g, pd->mem); | ||
342 | for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++) | ||
343 | pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i); | ||
344 | nvgpu_mem_end(g, pd->mem); | ||
345 | |||
346 | /* | ||
347 | * Check if the aperture AND address are set | ||
348 | */ | ||
349 | if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() || | ||
350 | gmmu_new_dual_pde_aperture_small_video_memory_f())) { | ||
351 | u64 addr = ((u64) (pde_v[2] & | ||
352 | gmmu_new_dual_pde_address_small_sys_f(~0)) << | ||
353 | gmmu_new_dual_pde_address_shift_v()) | | ||
354 | ((u64) pde_v[3] << 32); | ||
355 | |||
356 | if (addr) | ||
357 | pgsz = gmmu_page_size_small; | ||
358 | } | ||
359 | |||
360 | if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() | | ||
361 | gmmu_new_dual_pde_aperture_big_video_memory_f())) { | ||
362 | u64 addr = ((u64) (pde_v[0] & | ||
363 | gmmu_new_dual_pde_address_big_sys_f(~0)) << | ||
364 | gmmu_new_dual_pde_address_big_shift_v()) | | ||
365 | ((u64) pde_v[1] << 32); | ||
366 | if (addr) { | ||
367 | /* | ||
368 | * If small is set that means that somehow MM allowed | ||
369 | * both small and big to be set, the PDE is not valid | ||
370 | * and may be corrupted | ||
371 | */ | ||
372 | if (pgsz == gmmu_page_size_small) { | ||
373 | nvgpu_err(g, | ||
374 | "both small and big apertures enabled"); | ||
375 | return gmmu_nr_page_sizes; | ||
376 | } | ||
377 | } | ||
378 | pgsz = gmmu_page_size_big; | ||
379 | } | ||
380 | |||
381 | return pgsz; | ||
382 | } | ||
383 | |||
322 | static const struct gk20a_mmu_level gp10b_mm_levels[] = { | 384 | static const struct gk20a_mmu_level gp10b_mm_levels[] = { |
323 | {.hi_bit = {48, 48}, | 385 | {.hi_bit = {48, 48}, |
324 | .lo_bit = {47, 47}, | 386 | .lo_bit = {47, 47}, |
325 | .update_entry = update_gmmu_pde3_locked, | 387 | .update_entry = update_gmmu_pde3_locked, |
326 | .entry_size = 8}, | 388 | .entry_size = 8, |
389 | .get_pgsz = gk20a_get_pde_pgsz}, | ||
327 | {.hi_bit = {46, 46}, | 390 | {.hi_bit = {46, 46}, |
328 | .lo_bit = {38, 38}, | 391 | .lo_bit = {38, 38}, |
329 | .update_entry = update_gmmu_pde3_locked, | 392 | .update_entry = update_gmmu_pde3_locked, |
330 | .entry_size = 8}, | 393 | .entry_size = 8, |
394 | .get_pgsz = gk20a_get_pde_pgsz}, | ||
331 | {.hi_bit = {37, 37}, | 395 | {.hi_bit = {37, 37}, |
332 | .lo_bit = {29, 29}, | 396 | .lo_bit = {29, 29}, |
333 | .update_entry = update_gmmu_pde3_locked, | 397 | .update_entry = update_gmmu_pde3_locked, |
334 | .entry_size = 8}, | 398 | .entry_size = 8, |
399 | .get_pgsz = gk20a_get_pde_pgsz}, | ||
335 | {.hi_bit = {28, 28}, | 400 | {.hi_bit = {28, 28}, |
336 | .lo_bit = {21, 21}, | 401 | .lo_bit = {21, 21}, |
337 | .update_entry = update_gmmu_pde0_locked, | 402 | .update_entry = update_gmmu_pde0_locked, |
338 | .entry_size = 16}, | 403 | .entry_size = GP10B_PDE0_ENTRY_SIZE, |
404 | .get_pgsz = gp10b_get_pde0_pgsz}, | ||
339 | {.hi_bit = {20, 20}, | 405 | {.hi_bit = {20, 20}, |
340 | .lo_bit = {12, 16}, | 406 | .lo_bit = {12, 16}, |
341 | .update_entry = update_gmmu_pte_locked, | 407 | .update_entry = update_gmmu_pte_locked, |
342 | .entry_size = 8}, | 408 | .entry_size = 8, |
409 | .get_pgsz = gk20a_get_pte_pgsz}, | ||
343 | {.update_entry = NULL} | 410 | {.update_entry = NULL} |
344 | }; | 411 | }; |
345 | 412 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 7242fd67..ca07e359 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h | |||
@@ -200,6 +200,11 @@ struct gk20a_mmu_level { | |||
200 | u64 virt_addr, | 200 | u64 virt_addr, |
201 | struct nvgpu_gmmu_attrs *attrs); | 201 | struct nvgpu_gmmu_attrs *attrs); |
202 | u32 entry_size; | 202 | u32 entry_size; |
203 | /* | ||
204 | * Get pde page size | ||
205 | */ | ||
206 | enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g, | ||
207 | struct nvgpu_gmmu_pd *pd, u32 pd_idx); | ||
203 | }; | 208 | }; |
204 | 209 | ||
205 | static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) | 210 | static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) |