From 68dbfedd4f837d1bf2bab128500140f5d8cfadac Mon Sep 17 00:00:00 2001 From: David Nieto Date: Thu, 26 Oct 2017 15:00:47 -0700 Subject: gpu: nvgpu: fix pte location functions Modify the recursive loop in pte_find to make sure it is targeting the proper pde page size. JIRA NVGPUGV100-36 Change-Id: Ib3673d8d9f1bd3c907d532f9e2562ecdc5dda4af Signed-off-by: David Nieto Reviewed-on: https://git-master.nvidia.com/r/1586739 Reviewed-by: Alex Waterman Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: svc-mobile-coverity Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/mm/gmmu.c | 5 +++ drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 30 +++++++++++-- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 ++ drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 77 +++++++++++++++++++++++++++++++--- drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 5 +++ 5 files changed, 112 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 1eed3a3b..d6aaf8cd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -828,6 +828,11 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, if (!pd_next->mem) return -EINVAL; + attrs->pgsz = l->get_pgsz(g, pd, pd_idx); + + if (attrs->pgsz >= gmmu_nr_page_sizes) + return -EINVAL; + return __nvgpu_locate_pte(g, vm, pd_next, vaddr, lvl + 1, attrs, data, pd_out, pd_idx_out, diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 62f19039..cb0c015e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -287,15 +287,35 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, pd_write(g, pd, pd_offset + 1, pte_w[1]); } +enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 pd_idx) +{ + /* + * big and small page sizes are the same + */ + return gmmu_page_size_small; +} + +enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 pd_idx) +{ + /* + * return invalid + */ + return gmmu_nr_page_sizes; +} + const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, .lo_bit = {26, 26}, .update_entry = update_gmmu_pde_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, {.hi_bit = {25, 25}, .lo_bit = {12, 16}, .update_entry = update_gmmu_pte_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pte_pgsz}, {.update_entry = NULL} }; @@ -303,11 +323,13 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, .lo_bit = {27, 27}, .update_entry = update_gmmu_pde_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, {.hi_bit = {26, 26}, .lo_bit = {12, 17}, .update_entry = update_gmmu_pte_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pte_pgsz}, {.update_entry = NULL} }; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 183d6211..2478ee1f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -183,4 +183,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; +enum gmmu_pgsz_gk20a gk20a_get_pde_pgsz(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 pd_idx); +enum gmmu_pgsz_gk20a gk20a_get_pte_pgsz(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 pd_idx); #endif /* MM_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index dc746153..9bfb290a 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -319,27 +319,94 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, pd_write(g, pd, pd_offset + 1, pte_w[1]); } +#define GP10B_PDE0_ENTRY_SIZE 16 + +/* + * Calculate the pgsz of the pde level + * Pascal+ implements a 5 level page table structure with only the last + * level having a different number of entries depending on whether it holds + * big pages or small pages. + */ +static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 pd_idx) +{ + u32 pde_base = pd->mem_offs / sizeof(u32); + u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2]; + u32 i; + enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes; + + if (!pd->mem) + return pgsz; + + nvgpu_mem_begin(g, pd->mem); + for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++) + pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i); + nvgpu_mem_end(g, pd->mem); + + /* + * Check if the aperture AND address are set + */ + if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() || + gmmu_new_dual_pde_aperture_small_video_memory_f())) { + u64 addr = ((u64) (pde_v[2] & + gmmu_new_dual_pde_address_small_sys_f(~0)) << + gmmu_new_dual_pde_address_shift_v()) | + ((u64) pde_v[3] << 32); + + if (addr) + pgsz = gmmu_page_size_small; + } + + if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() | + gmmu_new_dual_pde_aperture_big_video_memory_f())) { + u64 addr = ((u64) (pde_v[0] & + gmmu_new_dual_pde_address_big_sys_f(~0)) << + gmmu_new_dual_pde_address_big_shift_v()) | + ((u64) pde_v[1] << 32); + if (addr) { + /* + * If small is set that means that somehow MM allowed + * both small and big to be set, the PDE is not valid + * and may be corrupted + */ + if (pgsz == gmmu_page_size_small) { + nvgpu_err(g, + "both small and big apertures enabled"); + return gmmu_nr_page_sizes; + } + } + pgsz = gmmu_page_size_big; + } + + return pgsz; +} + static const struct gk20a_mmu_level gp10b_mm_levels[] = { {.hi_bit = {48, 48}, .lo_bit = {47, 47}, .update_entry = update_gmmu_pde3_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, {.hi_bit = {46, 46}, .lo_bit = {38, 38}, .update_entry = update_gmmu_pde3_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, {.hi_bit = {37, 37}, .lo_bit = {29, 29}, .update_entry = update_gmmu_pde3_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, {.hi_bit = {28, 28}, .lo_bit = {21, 21}, .update_entry = update_gmmu_pde0_locked, - .entry_size = 16}, + .entry_size = GP10B_PDE0_ENTRY_SIZE, + .get_pgsz = gp10b_get_pde0_pgsz}, {.hi_bit = {20, 20}, .lo_bit = {12, 16}, .update_entry = update_gmmu_pte_locked, - .entry_size = 8}, + .entry_size = 8, + .get_pgsz = gk20a_get_pte_pgsz}, {.update_entry = NULL} }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 7242fd67..ca07e359 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -200,6 +200,11 @@ struct gk20a_mmu_level { u64 virt_addr, struct nvgpu_gmmu_attrs *attrs); u32 entry_size; + /* + * Get pde page size + */ + enum gmmu_pgsz_gk20a (*get_pgsz)(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 pd_idx); }; static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p) -- cgit v1.2.2