diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-05-08 07:43:27 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:20 -0400 |
commit | c8a5d3f908977b55f48e94f31e35e0534c4087cd (patch) | |
tree | 7c095ae930537a6974a5ffe344ca80015b980907 /drivers/gpu/nvgpu/gk20a | |
parent | 4ac110cb8a46a0c91a4a1e39c168de1b49688971 (diff) |
gpu: nvgpu: Fix TLB invalidate race
TLB invalidate can have a race if several contexts use the same
address space. One thread starting an invalidate allows another
thread to submit before invalidate is completed.
Bug 1502332
Change-Id: I074ec493eac3b153c5f23d796a1dee1d8db24855
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/407578
Reviewed-by: Riham Haidar <rhaidar@nvidia.com>
Tested-by: Riham Haidar <rhaidar@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 |
3 files changed, 9 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 1b7191db..7cfb3c24 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1575,13 +1575,6 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1575 | incr_cmd->gp_put = c->gpfifo.put; | 1575 | incr_cmd->gp_put = c->gpfifo.put; |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | /* Invalidate tlb if it's dirty... */ | ||
1579 | /* TBD: this should be done in the cmd stream, not with PRIs. */ | ||
1580 | /* We don't know what context is currently running... */ | ||
1581 | /* Note also: there can be more than one context associated with the */ | ||
1582 | /* address space (vm). */ | ||
1583 | gk20a_mm_tlb_invalidate(c->vm); | ||
1584 | |||
1585 | trace_gk20a_channel_submitted_gpfifo(c->g->dev->name, | 1578 | trace_gk20a_channel_submitted_gpfifo(c->g->dev->name, |
1586 | c->hw_chid, | 1579 | c->hw_chid, |
1587 | num_entries, | 1580 | num_entries, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 210fe1b3..1d6cdb53 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -298,7 +298,6 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
298 | } | 298 | } |
299 | 299 | ||
300 | mm->g = g; | 300 | mm->g = g; |
301 | mutex_init(&mm->tlb_lock); | ||
302 | mutex_init(&mm->l2_op_lock); | 301 | mutex_init(&mm->l2_op_lock); |
303 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | 302 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
304 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | 303 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
@@ -2892,11 +2891,11 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, | |||
2892 | 2891 | ||
2893 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | 2892 | void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) |
2894 | { | 2893 | { |
2895 | struct mm_gk20a *mm = vm->mm; | ||
2896 | struct gk20a *g = gk20a_from_vm(vm); | 2894 | struct gk20a *g = gk20a_from_vm(vm); |
2897 | u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); | 2895 | u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); |
2898 | u32 data; | 2896 | u32 data; |
2899 | s32 retry = 200; | 2897 | s32 retry = 200; |
2898 | static DEFINE_MUTEX(tlb_lock); | ||
2900 | 2899 | ||
2901 | gk20a_dbg_fn(""); | 2900 | gk20a_dbg_fn(""); |
2902 | 2901 | ||
@@ -2915,10 +2914,8 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
2915 | mutex_unlock(&vm->update_gmmu_lock); | 2914 | mutex_unlock(&vm->update_gmmu_lock); |
2916 | return; | 2915 | return; |
2917 | } | 2916 | } |
2918 | vm->tlb_dirty = false; | ||
2919 | mutex_unlock(&vm->update_gmmu_lock); | ||
2920 | 2917 | ||
2921 | mutex_lock(&mm->tlb_lock); | 2918 | mutex_lock(&tlb_lock); |
2922 | do { | 2919 | do { |
2923 | data = gk20a_readl(g, fb_mmu_ctrl_r()); | 2920 | data = gk20a_readl(g, fb_mmu_ctrl_r()); |
2924 | if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) | 2921 | if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) |
@@ -2927,17 +2924,17 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
2927 | retry--; | 2924 | retry--; |
2928 | } while (retry >= 0 || !tegra_platform_is_silicon()); | 2925 | } while (retry >= 0 || !tegra_platform_is_silicon()); |
2929 | 2926 | ||
2930 | if (retry < 0) | 2927 | if (retry < 0) { |
2931 | gk20a_warn(dev_from_gk20a(g), | 2928 | gk20a_warn(dev_from_gk20a(g), |
2932 | "wait mmu fifo space too many retries"); | 2929 | "wait mmu fifo space too many retries"); |
2930 | goto out; | ||
2931 | } | ||
2933 | 2932 | ||
2934 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | 2933 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), |
2935 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | 2934 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | |
2936 | fb_mmu_invalidate_pdb_aperture_vid_mem_f()); | 2935 | fb_mmu_invalidate_pdb_aperture_vid_mem_f()); |
2937 | 2936 | ||
2938 | /* this is a sledgehammer, it would seem */ | ||
2939 | gk20a_writel(g, fb_mmu_invalidate_r(), | 2937 | gk20a_writel(g, fb_mmu_invalidate_r(), |
2940 | fb_mmu_invalidate_all_pdb_true_f() | | ||
2941 | fb_mmu_invalidate_all_va_true_f() | | 2938 | fb_mmu_invalidate_all_va_true_f() | |
2942 | fb_mmu_invalidate_trigger_true_f()); | 2939 | fb_mmu_invalidate_trigger_true_f()); |
2943 | 2940 | ||
@@ -2954,7 +2951,10 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
2954 | gk20a_warn(dev_from_gk20a(g), | 2951 | gk20a_warn(dev_from_gk20a(g), |
2955 | "mmu invalidate too many retries"); | 2952 | "mmu invalidate too many retries"); |
2956 | 2953 | ||
2957 | mutex_unlock(&mm->tlb_lock); | 2954 | out: |
2955 | mutex_unlock(&tlb_lock); | ||
2956 | vm->tlb_dirty = false; | ||
2957 | mutex_unlock(&vm->update_gmmu_lock); | ||
2958 | } | 2958 | } |
2959 | 2959 | ||
2960 | int gk20a_mm_suspend(struct gk20a *g) | 2960 | int gk20a_mm_suspend(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e5d76f8f..4dfc2b7d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -322,7 +322,6 @@ struct mm_gk20a { | |||
322 | struct inst_desc inst_block; | 322 | struct inst_desc inst_block; |
323 | } pmu; | 323 | } pmu; |
324 | 324 | ||
325 | struct mutex tlb_lock; | ||
326 | struct mutex l2_op_lock; | 325 | struct mutex l2_op_lock; |
327 | 326 | ||
328 | void (*remove_support)(struct mm_gk20a *mm); | 327 | void (*remove_support)(struct mm_gk20a *mm); |