summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-02-09 23:37:02 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:06:37 -0400
commita3b26f25a226ea56e84dbda1bb510c1a7bc11054 (patch)
treeaf422d027e1e4c6d058380fd26dee4eb95c8afa8 /drivers/gpu
parent80d26baf051fa5fc05e4e15fe5fd6a7bac448a1c (diff)
gpu: nvgpu: TLB invalidate after map/unmap
Always invalidate TLB after mapping or unmapping, and remove the delayed TLB invalidate. Change-Id: I6df3c5c1fcca59f0f9e3f911168cb2f913c42815 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/696413 Reviewed-by: Automatic_Commit_Validation_User
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c26
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c6
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c16
5 files changed, 11 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d61656fc..a32496a2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1589,13 +1589,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1589 gk20a_dbg_info("pre-submit put %d, get %d, size %d", 1589 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1590 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); 1590 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1591 1591
1592 /* Invalidate tlb if it's dirty... */
1593 /* TBD: this should be done in the cmd stream, not with PRIs. */
1594 /* We don't know what context is currently running... */
1595 /* Note also: there can be more than one context associated with the */
1596 /* address space (vm). */
1597 g->ops.mm.tlb_invalidate(c->vm);
1598
1599 /* Make sure we have enough space for gpfifo entries. If not, 1592 /* Make sure we have enough space for gpfifo entries. If not,
1600 * wait for signals from completed submits */ 1593 * wait for signals from completed submits */
1601 if (gp_free_count(c) < num_entries + extra_entries) { 1594 if (gp_free_count(c) < num_entries + extra_entries) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 17cf8d83..3f71fc5e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1161,6 +1161,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1161 goto fail_validate; 1161 goto fail_validate;
1162 } 1162 }
1163 1163
1164 g->ops.mm.tlb_invalidate(vm);
1165
1164 return map_offset; 1166 return map_offset;
1165fail_validate: 1167fail_validate:
1166 if (allocated) 1168 if (allocated)
@@ -1202,8 +1204,6 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1202 dev_err(dev_from_vm(vm), 1204 dev_err(dev_from_vm(vm),
1203 "failed to update gmmu ptes on unmap"); 1205 "failed to update gmmu ptes on unmap");
1204 1206
1205 /* detect which if any pdes/ptes can now be released */
1206
1207 /* flush l2 so any dirty lines are written out *now*. 1207 /* flush l2 so any dirty lines are written out *now*.
1208 * also as we could potentially be switching this buffer 1208 * also as we could potentially be switching this buffer
1209 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at 1209 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
@@ -1213,6 +1213,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1213 * unmapping (below). */ 1213 * unmapping (below). */
1214 1214
1215 gk20a_mm_l2_flush(g, true); 1215 gk20a_mm_l2_flush(g, true);
1216
1217 g->ops.mm.tlb_invalidate(vm);
1216} 1218}
1217 1219
1218static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, 1220static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
@@ -1502,10 +1504,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1502 1504
1503 mutex_unlock(&vm->update_gmmu_lock); 1505 mutex_unlock(&vm->update_gmmu_lock);
1504 1506
1505 /* Invalidate kernel mappings immediately */
1506 if (vm_aspace_id(vm) == -1)
1507 gk20a_mm_tlb_invalidate(vm);
1508
1509 return map_offset; 1507 return map_offset;
1510 1508
1511clean_up: 1509clean_up:
@@ -1549,9 +1547,6 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1549 return 0; 1547 return 0;
1550 } 1548 }
1551 1549
1552 /* Invalidate kernel mappings immediately */
1553 g->ops.mm.tlb_invalidate(vm);
1554
1555 return vaddr; 1550 return vaddr;
1556} 1551}
1557 1552
@@ -1882,8 +1877,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1882 } 1877 }
1883 1878
1884 smp_mb(); 1879 smp_mb();
1885 vm->tlb_dirty = true;
1886 gk20a_dbg_fn("set tlb dirty");
1887 1880
1888 return 0; 1881 return 0;
1889 1882
@@ -1972,8 +1965,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1972 gk20a_mm_l2_invalidate(vm->mm->g); 1965 gk20a_mm_l2_invalidate(vm->mm->g);
1973 1966
1974 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); 1967 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1975
1976 vm->tlb_dirty = true;
1977} 1968}
1978 1969
1979/* NOTE! mapped_buffers lock must be held */ 1970/* NOTE! mapped_buffers lock must be held */
@@ -2992,13 +2983,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2992 if (!g->power_on) 2983 if (!g->power_on)
2993 return; 2984 return;
2994 2985
2995 /* No need to invalidate if tlb is clean */
2996 mutex_lock(&vm->update_gmmu_lock);
2997 if (!vm->tlb_dirty) {
2998 mutex_unlock(&vm->update_gmmu_lock);
2999 return;
3000 }
3001
3002 mutex_lock(&tlb_lock); 2986 mutex_lock(&tlb_lock);
3003 2987
3004 trace_gk20a_mm_tlb_invalidate(g->dev->name); 2988 trace_gk20a_mm_tlb_invalidate(g->dev->name);
@@ -3042,8 +3026,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
3042 3026
3043out: 3027out:
3044 mutex_unlock(&tlb_lock); 3028 mutex_unlock(&tlb_lock);
3045 vm->tlb_dirty = false;
3046 mutex_unlock(&vm->update_gmmu_lock);
3047} 3029}
3048 3030
3049int gk20a_mm_suspend(struct gk20a *g) 3031int gk20a_mm_suspend(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 596661ca..663bd5d3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -290,7 +290,6 @@ struct vm_gk20a {
290 290
291 bool big_pages; /* enable large page support */ 291 bool big_pages; /* enable large page support */
292 bool enable_ctag; 292 bool enable_ctag;
293 bool tlb_dirty;
294 bool mapped; 293 bool mapped;
295 294
296 u32 big_page_size; 295 u32 big_page_size;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 1adff5ab..605464d4 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B MMU 2 * GM20B MMU
3 * 3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -34,6 +34,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
34 u32 pte_cur; 34 u32 pte_cur;
35 void *pte_kv_cur; 35 void *pte_kv_cur;
36 struct page_table_gk20a *pte; 36 struct page_table_gk20a *pte;
37 struct gk20a *g = gk20a_from_vm(vm);
37 38
38 gk20a_dbg_fn(""); 39 gk20a_dbg_fn("");
39 40
@@ -74,8 +75,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
74 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); 75 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
75 76
76 smp_mb(); 77 smp_mb();
77 vm->tlb_dirty = true; 78 g->ops.mm.tlb_invalidate(vm);
78 gk20a_dbg_fn("set tlb dirty");
79 79
80 return 0; 80 return 0;
81fail: 81fail:
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 6817b107..82d16bd1 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Virtualized GPU Memory Management 2 * Virtualized GPU Memory Management
3 * 3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -114,7 +114,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
114 if (err || msg.ret) 114 if (err || msg.ret)
115 goto fail; 115 goto fail;
116 116
117 vm->tlb_dirty = true; 117 g->ops.mm.tlb_invalidate(vm);
118
118 return map_offset; 119 return map_offset;
119fail: 120fail:
120 gk20a_err(d, "%s: failed with err=%d\n", __func__, err); 121 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
@@ -154,7 +155,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
154 dev_err(dev_from_vm(vm), 155 dev_err(dev_from_vm(vm),
155 "failed to update gmmu ptes on unmap"); 156 "failed to update gmmu ptes on unmap");
156 157
157 vm->tlb_dirty = true; 158 g->ops.mm.tlb_invalidate(vm);
158} 159}
159 160
160static void vgpu_vm_remove_support(struct vm_gk20a *vm) 161static void vgpu_vm_remove_support(struct vm_gk20a *vm)
@@ -402,20 +403,11 @@ static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
402 403
403 gk20a_dbg_fn(""); 404 gk20a_dbg_fn("");
404 405
405 /* No need to invalidate if tlb is clean */
406 mutex_lock(&vm->update_gmmu_lock);
407 if (!vm->tlb_dirty) {
408 mutex_unlock(&vm->update_gmmu_lock);
409 return;
410 }
411
412 msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE; 406 msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
413 msg.handle = platform->virt_handle; 407 msg.handle = platform->virt_handle;
414 p->handle = vm->handle; 408 p->handle = vm->handle;
415 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 409 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
416 WARN_ON(err || msg.ret); 410 WARN_ON(err || msg.ret);
417 vm->tlb_dirty = false;
418 mutex_unlock(&vm->update_gmmu_lock);
419} 411}
420 412
421void vgpu_init_mm_ops(struct gpu_ops *gops) 413void vgpu_init_mm_ops(struct gpu_ops *gops)