diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-02-09 23:37:02 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:06:37 -0400 |
commit | a3b26f25a226ea56e84dbda1bb510c1a7bc11054 (patch) | |
tree | af422d027e1e4c6d058380fd26dee4eb95c8afa8 /drivers/gpu/nvgpu | |
parent | 80d26baf051fa5fc05e4e15fe5fd6a7bac448a1c (diff) |
gpu: nvgpu: TLB invalidate after map/unmap
Always invalidate TLB after mapping or unmapping, and remove the
delayed TLB invalidate.
Change-Id: I6df3c5c1fcca59f0f9e3f911168cb2f913c42815
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/696413
Reviewed-by: Automatic_Commit_Validation_User
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 16 |
5 files changed, 11 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index d61656fc..a32496a2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1589,13 +1589,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1589 | gk20a_dbg_info("pre-submit put %d, get %d, size %d", | 1589 | gk20a_dbg_info("pre-submit put %d, get %d, size %d", |
1590 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | 1590 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); |
1591 | 1591 | ||
1592 | /* Invalidate tlb if it's dirty... */ | ||
1593 | /* TBD: this should be done in the cmd stream, not with PRIs. */ | ||
1594 | /* We don't know what context is currently running... */ | ||
1595 | /* Note also: there can be more than one context associated with the */ | ||
1596 | /* address space (vm). */ | ||
1597 | g->ops.mm.tlb_invalidate(c->vm); | ||
1598 | |||
1599 | /* Make sure we have enough space for gpfifo entries. If not, | 1592 | /* Make sure we have enough space for gpfifo entries. If not, |
1600 | * wait for signals from completed submits */ | 1593 | * wait for signals from completed submits */ |
1601 | if (gp_free_count(c) < num_entries + extra_entries) { | 1594 | if (gp_free_count(c) < num_entries + extra_entries) { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 17cf8d83..3f71fc5e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1161,6 +1161,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1161 | goto fail_validate; | 1161 | goto fail_validate; |
1162 | } | 1162 | } |
1163 | 1163 | ||
1164 | g->ops.mm.tlb_invalidate(vm); | ||
1165 | |||
1164 | return map_offset; | 1166 | return map_offset; |
1165 | fail_validate: | 1167 | fail_validate: |
1166 | if (allocated) | 1168 | if (allocated) |
@@ -1202,8 +1204,6 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1202 | dev_err(dev_from_vm(vm), | 1204 | dev_err(dev_from_vm(vm), |
1203 | "failed to update gmmu ptes on unmap"); | 1205 | "failed to update gmmu ptes on unmap"); |
1204 | 1206 | ||
1205 | /* detect which if any pdes/ptes can now be released */ | ||
1206 | |||
1207 | /* flush l2 so any dirty lines are written out *now*. | 1207 | /* flush l2 so any dirty lines are written out *now*. |
1208 | * also as we could potentially be switching this buffer | 1208 | * also as we could potentially be switching this buffer |
1209 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | 1209 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at |
@@ -1213,6 +1213,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1213 | * unmapping (below). */ | 1213 | * unmapping (below). */ |
1214 | 1214 | ||
1215 | gk20a_mm_l2_flush(g, true); | 1215 | gk20a_mm_l2_flush(g, true); |
1216 | |||
1217 | g->ops.mm.tlb_invalidate(vm); | ||
1216 | } | 1218 | } |
1217 | 1219 | ||
1218 | static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, | 1220 | static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, |
@@ -1502,10 +1504,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1502 | 1504 | ||
1503 | mutex_unlock(&vm->update_gmmu_lock); | 1505 | mutex_unlock(&vm->update_gmmu_lock); |
1504 | 1506 | ||
1505 | /* Invalidate kernel mappings immediately */ | ||
1506 | if (vm_aspace_id(vm) == -1) | ||
1507 | gk20a_mm_tlb_invalidate(vm); | ||
1508 | |||
1509 | return map_offset; | 1507 | return map_offset; |
1510 | 1508 | ||
1511 | clean_up: | 1509 | clean_up: |
@@ -1549,9 +1547,6 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1549 | return 0; | 1547 | return 0; |
1550 | } | 1548 | } |
1551 | 1549 | ||
1552 | /* Invalidate kernel mappings immediately */ | ||
1553 | g->ops.mm.tlb_invalidate(vm); | ||
1554 | |||
1555 | return vaddr; | 1550 | return vaddr; |
1556 | } | 1551 | } |
1557 | 1552 | ||
@@ -1882,8 +1877,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1882 | } | 1877 | } |
1883 | 1878 | ||
1884 | smp_mb(); | 1879 | smp_mb(); |
1885 | vm->tlb_dirty = true; | ||
1886 | gk20a_dbg_fn("set tlb dirty"); | ||
1887 | 1880 | ||
1888 | return 0; | 1881 | return 0; |
1889 | 1882 | ||
@@ -1972,8 +1965,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | |||
1972 | gk20a_mm_l2_invalidate(vm->mm->g); | 1965 | gk20a_mm_l2_invalidate(vm->mm->g); |
1973 | 1966 | ||
1974 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); | 1967 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); |
1975 | |||
1976 | vm->tlb_dirty = true; | ||
1977 | } | 1968 | } |
1978 | 1969 | ||
1979 | /* NOTE! mapped_buffers lock must be held */ | 1970 | /* NOTE! mapped_buffers lock must be held */ |
@@ -2992,13 +2983,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
2992 | if (!g->power_on) | 2983 | if (!g->power_on) |
2993 | return; | 2984 | return; |
2994 | 2985 | ||
2995 | /* No need to invalidate if tlb is clean */ | ||
2996 | mutex_lock(&vm->update_gmmu_lock); | ||
2997 | if (!vm->tlb_dirty) { | ||
2998 | mutex_unlock(&vm->update_gmmu_lock); | ||
2999 | return; | ||
3000 | } | ||
3001 | |||
3002 | mutex_lock(&tlb_lock); | 2986 | mutex_lock(&tlb_lock); |
3003 | 2987 | ||
3004 | trace_gk20a_mm_tlb_invalidate(g->dev->name); | 2988 | trace_gk20a_mm_tlb_invalidate(g->dev->name); |
@@ -3042,8 +3026,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
3042 | 3026 | ||
3043 | out: | 3027 | out: |
3044 | mutex_unlock(&tlb_lock); | 3028 | mutex_unlock(&tlb_lock); |
3045 | vm->tlb_dirty = false; | ||
3046 | mutex_unlock(&vm->update_gmmu_lock); | ||
3047 | } | 3029 | } |
3048 | 3030 | ||
3049 | int gk20a_mm_suspend(struct gk20a *g) | 3031 | int gk20a_mm_suspend(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 596661ca..663bd5d3 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -290,7 +290,6 @@ struct vm_gk20a { | |||
290 | 290 | ||
291 | bool big_pages; /* enable large page support */ | 291 | bool big_pages; /* enable large page support */ |
292 | bool enable_ctag; | 292 | bool enable_ctag; |
293 | bool tlb_dirty; | ||
294 | bool mapped; | 293 | bool mapped; |
295 | 294 | ||
296 | u32 big_page_size; | 295 | u32 big_page_size; |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 1adff5ab..605464d4 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B MMU | 2 | * GM20B MMU |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -34,6 +34,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
34 | u32 pte_cur; | 34 | u32 pte_cur; |
35 | void *pte_kv_cur; | 35 | void *pte_kv_cur; |
36 | struct page_table_gk20a *pte; | 36 | struct page_table_gk20a *pte; |
37 | struct gk20a *g = gk20a_from_vm(vm); | ||
37 | 38 | ||
38 | gk20a_dbg_fn(""); | 39 | gk20a_dbg_fn(""); |
39 | 40 | ||
@@ -74,8 +75,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | |||
74 | unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); | 75 | unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); |
75 | 76 | ||
76 | smp_mb(); | 77 | smp_mb(); |
77 | vm->tlb_dirty = true; | 78 | g->ops.mm.tlb_invalidate(vm); |
78 | gk20a_dbg_fn("set tlb dirty"); | ||
79 | 79 | ||
80 | return 0; | 80 | return 0; |
81 | fail: | 81 | fail: |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 6817b107..82d16bd1 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Virtualized GPU Memory Management | 2 | * Virtualized GPU Memory Management |
3 | * | 3 | * |
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -114,7 +114,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
114 | if (err || msg.ret) | 114 | if (err || msg.ret) |
115 | goto fail; | 115 | goto fail; |
116 | 116 | ||
117 | vm->tlb_dirty = true; | 117 | g->ops.mm.tlb_invalidate(vm); |
118 | |||
118 | return map_offset; | 119 | return map_offset; |
119 | fail: | 120 | fail: |
120 | gk20a_err(d, "%s: failed with err=%d\n", __func__, err); | 121 | gk20a_err(d, "%s: failed with err=%d\n", __func__, err); |
@@ -154,7 +155,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
154 | dev_err(dev_from_vm(vm), | 155 | dev_err(dev_from_vm(vm), |
155 | "failed to update gmmu ptes on unmap"); | 156 | "failed to update gmmu ptes on unmap"); |
156 | 157 | ||
157 | vm->tlb_dirty = true; | 158 | g->ops.mm.tlb_invalidate(vm); |
158 | } | 159 | } |
159 | 160 | ||
160 | static void vgpu_vm_remove_support(struct vm_gk20a *vm) | 161 | static void vgpu_vm_remove_support(struct vm_gk20a *vm) |
@@ -402,20 +403,11 @@ static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm) | |||
402 | 403 | ||
403 | gk20a_dbg_fn(""); | 404 | gk20a_dbg_fn(""); |
404 | 405 | ||
405 | /* No need to invalidate if tlb is clean */ | ||
406 | mutex_lock(&vm->update_gmmu_lock); | ||
407 | if (!vm->tlb_dirty) { | ||
408 | mutex_unlock(&vm->update_gmmu_lock); | ||
409 | return; | ||
410 | } | ||
411 | |||
412 | msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE; | 406 | msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE; |
413 | msg.handle = platform->virt_handle; | 407 | msg.handle = platform->virt_handle; |
414 | p->handle = vm->handle; | 408 | p->handle = vm->handle; |
415 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 409 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
416 | WARN_ON(err || msg.ret); | 410 | WARN_ON(err || msg.ret); |
417 | vm->tlb_dirty = false; | ||
418 | mutex_unlock(&vm->update_gmmu_lock); | ||
419 | } | 411 | } |
420 | 412 | ||
421 | void vgpu_init_mm_ops(struct gpu_ops *gops) | 413 | void vgpu_init_mm_ops(struct gpu_ops *gops) |