From 63714e7cc158d0574947c2171a81988ffece2a2a Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 23 Jun 2015 15:28:00 -0700 Subject: gpu: nvgpu: Implement priv pages Implement support for privileged pages. Use them for kernel allocated buffers. Change-Id: I720fc441008077b8e2ed218a7a685b8aab2258f0 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/761919 --- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 3 ++- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 15 +++++++------ drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h | 12 +++++++++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 36 ++++++++++++++++++++----------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 ++++-- drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 3 ++- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 4 ++-- drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h | 12 +++++++++++ drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 1 + 10 files changed, 67 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 7f212eca..d15b6e8a 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -1126,7 +1126,8 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.mem.sgt, g->gr.compbit_store.mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_read_only); + gk20a_mem_flag_read_only, + false); if (!vaddr) { gk20a_warn(&cde_ctx->pdev->dev, "cde: cannot map compression bit backing store"); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 72f1178b..25712a64 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -350,6 +350,7 @@ struct gpu_ops { int rw_flag, bool clear_ctags, bool sparse, + bool priv, struct vm_gk20a_mapping_batch *batch); void (*gmmu_unmap)(struct vm_gk20a *vm, u64 vaddr, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e232bf17..cf9cab0e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1748,7 +1748,8 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) &ucode_info->surface_desc.sgt, ucode_info->surface_desc.size, 0, /* flags */ - gk20a_mem_flag_read_only); + gk20a_mem_flag_read_only, + false); if (!ucode_info->surface_desc.gpu_va) { gk20a_err(d, "failed to update gmmu ptes\n"); return -ENOMEM; @@ -2375,7 +2376,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none); + gk20a_mem_flag_none, true); if (!gpu_va) goto clean_up; g_bfr_va[CIRCULAR_VA] = gpu_va; @@ -2392,7 +2393,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none); + gk20a_mem_flag_none, false); if (!gpu_va) goto clean_up; g_bfr_va[ATTRIBUTE_VA] = gpu_va; @@ -2409,7 +2410,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none); + gk20a_mem_flag_none, true); if (!gpu_va) goto clean_up; g_bfr_va[PAGEPOOL_VA] = gpu_va; @@ -2419,7 +2420,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, sgt = gr->global_ctx_buffer[GOLDEN_CTX].mem.sgt; size = gr->global_ctx_buffer[GOLDEN_CTX].mem.size; gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, - gk20a_mem_flag_none); + gk20a_mem_flag_none, true); if (!gpu_va) goto clean_up; g_bfr_va[GOLDEN_CTX_VA] = gpu_va; @@ -2429,7 +2430,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.sgt; size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size; gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, - gk20a_mem_flag_none); + gk20a_mem_flag_none, true); if (!gpu_va) goto clean_up; g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; @@ -2501,7 +2502,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, gr_ctx->mem.gpu_va = gk20a_gmmu_map(vm, &gr_ctx->mem.sgt, gr_ctx->mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none); + gk20a_mem_flag_none, true); if (!gr_ctx->mem.gpu_va) goto err_free_mem; diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h index 45ae59d6..19e44382 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h @@ -146,6 +146,18 @@ static inline u32 gmmu_pte_valid_false_f(void) { return 0x0; } +static inline u32 gmmu_pte_privilege_w(void) +{ + return 0; +} +static inline u32 gmmu_pte_privilege_true_f(void) +{ + return 0x2; +} +static inline u32 gmmu_pte_privilege_false_f(void) +{ + return 0x0; +} static inline u32 gmmu_pte_address_sys_f(u32 v) { return (v & 0xfffffff) << 4; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index f3512f90..112e218a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -101,7 +101,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, u8 kind_v, u32 ctag_offset, bool cacheable, bool umapped_pte, int rw_flag, bool sparse, - u32 flags); + bool priv); static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); @@ -1168,6 +1168,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, int rw_flag, bool clear_ctags, bool sparse, + bool priv, struct vm_gk20a_mapping_batch *batch) { int err = 0; @@ -1208,7 +1209,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, rw_flag, sparse, - flags); + priv); if (err) { gk20a_err(d, "failed to update ptes on map"); goto fail_validate; @@ -1559,6 +1560,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, flags, rw_flag, clear_ctags, false, + false, batch); if (!map_offset) goto clean_up; @@ -1779,6 +1781,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm, gk20a_mem_flag_read_only, false, /* clear_ctags */ false, /* sparse */ + false, /* priv */ NULL); /* mapping_batch handle */ if (!mapped_buffer->ctag_map_win_addr) { @@ -1802,7 +1805,8 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, struct sg_table **sgt, u64 size, u32 flags, - int rw_flag) + int rw_flag, + bool priv) { struct gk20a *g = gk20a_from_vm(vm); u64 vaddr; @@ -1818,6 +1822,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, flags, rw_flag, false, /* clear_ctags */ false, /* sparse */ + priv, /* priv */ NULL); /* mapping_batch handle */ mutex_unlock(&vm->update_gmmu_lock); if (!vaddr) { @@ -1932,7 +1937,8 @@ int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, if (err) return err; - mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, gk20a_mem_flag_none); + mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, + gk20a_mem_flag_none, false); if (!mem->gpu_va) { err = -ENOMEM; goto fail_free; @@ -2126,7 +2132,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, u64 *iova, u32 kind_v, u32 *ctag, bool cacheable, bool unammped_pte, - int rw_flag, bool sparse, u32 flags) + int rw_flag, bool sparse, bool priv) { struct gk20a *g = gk20a_from_vm(vm); bool small_valid, big_valid; @@ -2176,7 +2182,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, u64 *iova, u32 kind_v, u32 *ctag, bool cacheable, bool unmapped_pte, - int rw_flag, bool sparse, u32 flags) + int rw_flag, bool sparse, bool priv) { struct gk20a *g = gk20a_from_vm(vm); u32 ctag_granularity = g->ops.fb.compression_page_size(g); @@ -2193,6 +2199,9 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, gmmu_pte_address_sys_f(*iova >> gmmu_pte_address_shift_v()); + if (priv) + pte_w[0] |= gmmu_pte_privilege_true_f(); + pte_w[1] = gmmu_pte_aperture_video_memory_f() | gmmu_pte_kind_f(kind_v) | gmmu_pte_comptagline_f(*ctag / ctag_granularity); @@ -2270,7 +2279,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, int rw_flag, bool sparse, int lvl, - u32 flags) + bool priv) { const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; @@ -2318,7 +2327,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, err = l->update_entry(vm, pte, pde_i, pgsz_idx, sgl, offset, iova, kind_v, ctag, cacheable, unmapped_pte, - rw_flag, sparse, flags); + rw_flag, sparse, priv); if (err) return err; @@ -2339,7 +2348,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, gpu_va, next, kind_v, ctag, cacheable, unmapped_pte, - rw_flag, sparse, lvl+1, flags); + rw_flag, sparse, lvl+1, priv); unmap_gmmu_pages(next_pte); if (err) @@ -2364,7 +2373,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, bool cacheable, bool unmapped_pte, int rw_flag, bool sparse, - u32 flags) + bool priv) { struct gk20a *g = gk20a_from_vm(vm); int ctag_granularity = g->ops.fb.compression_page_size(g); @@ -2377,7 +2386,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d", pgsz_idx, - sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags) + sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0) : 0ULL, buffer_offset, sgt ? sgt->nents : 0); @@ -2386,7 +2395,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, return -EINVAL; if (sgt) { - iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags); + iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); if (!vm->mm->bypass_smmu && iova) { iova += space_to_skip; } else { @@ -2422,7 +2431,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, &iova, gpu_va, gpu_end, kind_v, &ctag, - cacheable, unmapped_pte, rw_flag, sparse, 0, flags); + cacheable, unmapped_pte, rw_flag, sparse, 0, priv); unmap_gmmu_pages(&vm->pdb); smp_mb(); @@ -2835,6 +2844,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, gk20a_mem_flag_none, false, true, + false, NULL); if (!map_offset) { mutex_unlock(&vm->update_gmmu_lock); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index ee99c821..3af35b26 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -221,7 +221,7 @@ struct gk20a_mmu_level { u64 *iova, u32 kind_v, u32 *ctag, bool cacheable, bool unmapped_pte, - int rw_flag, bool sparse, u32 flags); + int rw_flag, bool sparse, bool priv); size_t entry_size; }; @@ -441,7 +441,8 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, struct sg_table **sgt, u64 size, u32 flags, - int rw_flag); + int rw_flag, + bool priv); int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, @@ -498,6 +499,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, int rw_flag, bool clear_ctags, bool sparse, + bool priv, struct vm_gk20a_mapping_batch *batch); void gk20a_gmmu_unmap(struct vm_gk20a *vm, diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 053550f6..cf855463 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c @@ -111,7 +111,8 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, map->vm = vm; map->rw_flag = rw_flag; map->gpu_va = gk20a_gmmu_map(vm, &p->sgt, p->size, - 0/*uncached*/, rw_flag); + 0/*uncached*/, rw_flag, + false); if (!map->gpu_va) { kfree(map); return -ENOMEM; diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index a238c523..3a19d6b6 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -333,7 +333,7 @@ int prepare_ucode_blob(struct gk20a *g) sg_dma_address(sgt->sgl) = 0; g->pmu.wpr_buf.gpu_va = gk20a_gmmu_map(vm, &sgt, wprsize, - 0, gk20a_mem_flag_none); + 0, gk20a_mem_flag_none, false); gm20b_dbg_pmu("wpr mapped gpu va :%llx\n", g->pmu.wpr_buf.gpu_va); /* Discover all managed falcons*/ @@ -1323,7 +1323,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, &acr->hsbl_ucode.sgt, bl_sz, 0, /* flags */ - gk20a_mem_flag_read_only); + gk20a_mem_flag_read_only, false); if (!acr->hsbl_ucode.gpu_va) { gk20a_err(d, "failed to map pmu ucode memory!!"); goto err_free_ucode; diff --git a/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h index 9a1c0e71..32b65912 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gmmu_gm20b.h @@ -146,6 +146,18 @@ static inline u32 gmmu_pte_valid_false_f(void) { return 0x0; } +static inline u32 gmmu_pte_privilege_w(void) +{ + return 0; +} +static inline u32 gmmu_pte_privilege_true_f(void) +{ + return 0x2; +} +static inline u32 gmmu_pte_privilege_false_f(void) +{ + return 0x0; +} static inline u32 gmmu_pte_address_sys_f(u32 v) { return (v & 0xfffffff) << 4; diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index be1fa47d..640111b5 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -67,6 +67,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, int rw_flag, bool clear_ctags, bool sparse, + bool priv, struct vm_gk20a_mapping_batch *batch) { int err = 0; -- cgit v1.2.2