gpu: nvgpu: TLB invalidate after map/unmap

Always invalidate TLB after mapping or unmapping, and remove the delayed TLB invalidate. Change-Id: I6df3c5c1fcca59f0f9e3f911168cb2f913c42815 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/696413 Reviewed-by: Automatic_Commit_Validation_User
author: Terje Bergstrom <tbergstrom@nvidia.com> 2015-02-09 23:37:02 -0500
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-04-04 21:06:37 -0400
commit: a3b26f25a226ea56e84dbda1bb510c1a7bc11054 (patch)
tree: af422d027e1e4c6d058380fd26dee4eb95c8afa8 /drivers/gpu
parent: 80d26baf051fa5fc05e4e15fe5fd6a7bac448a1c (diff)
5 files changed, 11 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d61656fc..a32496a2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1589,13 +1589,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        gk20a_dbg_info("pre-submit put %d, get %d, size %d",
                c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
-        /* Invalidate tlb if it's dirty...                                   */
-        /* TBD: this should be done in the cmd stream, not with PRIs.        */
-        /* We don't know what context is currently running...                */
-        /* Note also: there can be more than one context associated with the */
-        /* address space (vm).   */
-        g->ops.mm.tlb_invalidate(c->vm);
        /* Make sure we have enough space for gpfifo entries. If not,
         * wait for signals from completed submits */
        if (gp_free_count(c) < num_entries + extra_entries) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 17cf8d83..3f71fc5e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1161,6 +1161,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
                goto fail_validate;
        }
+        g->ops.mm.tlb_invalidate(vm);
        return map_offset;
 fail_validate:
        if (allocated)
@@ -1202,8 +1204,6 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
                dev_err(dev_from_vm(vm),
                        "failed to update gmmu ptes on unmap");
-        /* detect which if any pdes/ptes can now be released */
        /* flush l2 so any dirty lines are written out *now*.
         *  also as we could potentially be switching this buffer
         * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
@@ -1213,6 +1213,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
         * unmapping (below). */
        gk20a_mm_l2_flush(g, true);
+        g->ops.mm.tlb_invalidate(vm);
 }
 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
@@ -1502,10 +1504,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        mutex_unlock(&vm->update_gmmu_lock);
-        /* Invalidate kernel mappings immediately */
-        if (vm_aspace_id(vm) == -1)
-                gk20a_mm_tlb_invalidate(vm);
        return map_offset;
 clean_up:
@@ -1549,9 +1547,6 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
                return 0;
        }
-        /* Invalidate kernel mappings immediately */
-        g->ops.mm.tlb_invalidate(vm);
        return vaddr;
 }
@@ -1882,8 +1877,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
        }
        smp_mb();
-        vm->tlb_dirty = true;
-        gk20a_dbg_fn("set tlb dirty");
        return 0;
@@ -1972,8 +1965,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
        gk20a_mm_l2_invalidate(vm->mm->g);
        gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
-        vm->tlb_dirty  = true;
 }
 /* NOTE! mapped_buffers lock must be held */
@@ -2992,13 +2983,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
        if (!g->power_on)
                return;
-        /* No need to invalidate if tlb is clean */
-        mutex_lock(&vm->update_gmmu_lock);
-        if (!vm->tlb_dirty) {
-                mutex_unlock(&vm->update_gmmu_lock);
-                return;
-        }
        mutex_lock(&tlb_lock);
        trace_gk20a_mm_tlb_invalidate(g->dev->name);
@@ -3042,8 +3026,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 out:
        mutex_unlock(&tlb_lock);
-        vm->tlb_dirty = false;
-        mutex_unlock(&vm->update_gmmu_lock);
 }
 int gk20a_mm_suspend(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 596661ca..663bd5d3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -290,7 +290,6 @@ struct vm_gk20a {
        bool big_pages;   /* enable large page support */
        bool enable_ctag;
-        bool tlb_dirty;
        bool mapped;
        u32 big_page_size;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 1adff5ab..605464d4 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -1,7 +1,7 @@
 /*
 * GM20B MMU
 *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -34,6 +34,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
        u32 pte_cur;
        void *pte_kv_cur;
        struct page_table_gk20a *pte;
+        struct gk20a *g = gk20a_from_vm(vm);
        gk20a_dbg_fn("");
@@ -74,8 +75,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
        unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
        smp_mb();
-        vm->tlb_dirty = true;
+        g->ops.mm.tlb_invalidate(vm);
-        gk20a_dbg_fn("set tlb dirty");
        return 0;
 fail:
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 6817b107..82d16bd1 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -1,7 +1,7 @@
 /*
 * Virtualized GPU Memory Management
 *
- * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -114,7 +114,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
        if (err || msg.ret)
                goto fail;
-        vm->tlb_dirty = true;
+        g->ops.mm.tlb_invalidate(vm);
        return map_offset;
 fail:
        gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
@@ -154,7 +155,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
                dev_err(dev_from_vm(vm),
                        "failed to update gmmu ptes on unmap");
-        vm->tlb_dirty = true;
+        g->ops.mm.tlb_invalidate(vm);
 }
 static void vgpu_vm_remove_support(struct vm_gk20a *vm)
@@ -402,20 +403,11 @@ static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
        gk20a_dbg_fn("");
-        /* No need to invalidate if tlb is clean */
-        mutex_lock(&vm->update_gmmu_lock);
-        if (!vm->tlb_dirty) {
-                mutex_unlock(&vm->update_gmmu_lock);
-                return;
-        }
        msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
        msg.handle = platform->virt_handle;
        p->handle = vm->handle;
        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
        WARN_ON(err || msg.ret);
-        vm->tlb_dirty = false;
-        mutex_unlock(&vm->update_gmmu_lock);
 }
 void vgpu_init_mm_ops(struct gpu_ops *gops)
author	Terje Bergstrom <tbergstrom@nvidia.com>	2015-02-09 23:37:02 -0500
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-04-04 21:06:37 -0400
commit	a3b26f25a226ea56e84dbda1bb510c1a7bc11054 (patch)
tree	af422d027e1e4c6d058380fd26dee4eb95c8afa8 /drivers/gpu
parent	80d26baf051fa5fc05e4e15fe5fd6a7bac448a1c (diff)