From c8a5d3f908977b55f48e94f31e35e0534c4087cd Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Thu, 8 May 2014 14:43:27 +0300 Subject: gpu: nvgpu: Fix TLB invalidate race TLB invalidate can have a race if several contexts use the same address space. One thread starting an invalidate allows another thread to submit before invalidate is completed. Bug 1502332 Change-Id: I074ec493eac3b153c5f23d796a1dee1d8db24855 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/407578 Reviewed-by: Riham Haidar Tested-by: Riham Haidar --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 7 ------- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 18 +++++++++--------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 - 3 files changed, 9 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 1b7191db..7cfb3c24 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1575,13 +1575,6 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, incr_cmd->gp_put = c->gpfifo.put; } - /* Invalidate tlb if it's dirty... */ - /* TBD: this should be done in the cmd stream, not with PRIs. */ - /* We don't know what context is currently running... */ - /* Note also: there can be more than one context associated with the */ - /* address space (vm). */ - gk20a_mm_tlb_invalidate(c->vm); - trace_gk20a_channel_submitted_gpfifo(c->g->dev->name, c->hw_chid, num_entries, diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 210fe1b3..1d6cdb53 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -298,7 +298,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) } mm->g = g; - mutex_init(&mm->tlb_lock); mutex_init(&mm->l2_op_lock); mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; @@ -2892,11 +2891,11 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) { - struct mm_gk20a *mm = vm->mm; struct gk20a *g = gk20a_from_vm(vm); u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); u32 data; s32 retry = 200; + static DEFINE_MUTEX(tlb_lock); gk20a_dbg_fn(""); @@ -2915,10 +2914,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) mutex_unlock(&vm->update_gmmu_lock); return; } - vm->tlb_dirty = false; - mutex_unlock(&vm->update_gmmu_lock); - mutex_lock(&mm->tlb_lock); + mutex_lock(&tlb_lock); do { data = gk20a_readl(g, fb_mmu_ctrl_r()); if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) @@ -2927,17 +2924,17 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) retry--; } while (retry >= 0 || !tegra_platform_is_silicon()); - if (retry < 0) + if (retry < 0) { gk20a_warn(dev_from_gk20a(g), "wait mmu fifo space too many retries"); + goto out; + } gk20a_writel(g, fb_mmu_invalidate_pdb_r(), fb_mmu_invalidate_pdb_addr_f(addr_lo) | fb_mmu_invalidate_pdb_aperture_vid_mem_f()); - /* this is a sledgehammer, it would seem */ gk20a_writel(g, fb_mmu_invalidate_r(), - fb_mmu_invalidate_all_pdb_true_f() | fb_mmu_invalidate_all_va_true_f() | fb_mmu_invalidate_trigger_true_f()); @@ -2954,7 +2951,10 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) gk20a_warn(dev_from_gk20a(g), "mmu invalidate too many retries"); - mutex_unlock(&mm->tlb_lock); +out: + mutex_unlock(&tlb_lock); + vm->tlb_dirty = false; + mutex_unlock(&vm->update_gmmu_lock); } int gk20a_mm_suspend(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e5d76f8f..4dfc2b7d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -322,7 +322,6 @@ struct mm_gk20a { struct inst_desc inst_block; } pmu; - struct mutex tlb_lock; struct mutex l2_op_lock; void (*remove_support)(struct mm_gk20a *mm); -- cgit v1.2.2