summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2017-08-18 06:52:29 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-08-22 06:53:51 -0400
commit81868a187fa3b217368206f17b19309846e8e7fb (patch)
tree2b59e33b61cc6e206f7781f3b4ab44c5c7b6d721
parent5f010177de985c901c33c914efe70a8498a5974f (diff)
gpu: nvgpu: Nvgpu abstraction for linux barriers.
construct wrapper nvgpu_* methods to replace mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and smp_read_barrier_depends. NVGPU-122 Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df Signed-off-by: Debarshi <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541199 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c43
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c13
-rw-r--r--drivers/gpu/nvgpu/common/mm/lockless_allocator.c5
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu.c3
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu_pg.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c54
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c3
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/barrier.h40
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h33
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c7
19 files changed, 171 insertions, 76 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index b00ecd31..f1de54c6 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -27,6 +27,7 @@
27#include <nvgpu/bug.h> 27#include <nvgpu/bug.h>
28#include <nvgpu/kref.h> 28#include <nvgpu/kref.h>
29#include <nvgpu/log.h> 29#include <nvgpu/log.h>
30#include <nvgpu/barrier.h>
30 31
31#include "gk20a/gk20a.h" 32#include "gk20a/gk20a.h"
32#include "clk/clk_arb.h" 33#include "clk/clk_arb.h"
@@ -386,7 +387,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
386 goto init_fail; 387 goto init_fail;
387 do { 388 do {
388 /* Check that first run is completed */ 389 /* Check that first run is completed */
389 smp_mb(); 390 nvgpu_smp_mb();
390 wait_event_interruptible(arb->request_wq, 391 wait_event_interruptible(arb->request_wq,
391 nvgpu_atomic_read(&arb->req_nr)); 392 nvgpu_atomic_read(&arb->req_nr));
392 } while (!nvgpu_atomic_read(&arb->req_nr)); 393 } while (!nvgpu_atomic_read(&arb->req_nr));
@@ -578,7 +579,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
578 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; 579 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
579 /* make sure that the initialization of the pool is visible 580 /* make sure that the initialization of the pool is visible
580 * before the update */ 581 * before the update */
581 smp_wmb(); 582 nvgpu_smp_wmb();
582 session->target = &session->target_pool[0]; 583 session->target = &session->target_pool[0];
583 584
584 init_llist_head(&session->targets); 585 init_llist_head(&session->targets);
@@ -706,7 +707,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
706 707
707 table = ACCESS_ONCE(arb->current_vf_table); 708 table = ACCESS_ONCE(arb->current_vf_table);
708 /* make flag visible when all data has resolved in the tables */ 709 /* make flag visible when all data has resolved in the tables */
709 smp_rmb(); 710 nvgpu_smp_rmb();
710 711
711 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : 712 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
712 &arb->vf_table_pool[0]; 713 &arb->vf_table_pool[0];
@@ -980,7 +981,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
980 } 981 }
981 982
982 /* make table visible when all data has resolved in the tables */ 983 /* make table visible when all data has resolved in the tables */
983 smp_wmb(); 984 nvgpu_smp_wmb();
984 xchg(&arb->current_vf_table, table); 985 xchg(&arb->current_vf_table, table);
985 986
986exit_vf_table: 987exit_vf_table:
@@ -1077,7 +1078,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1077 &session->target_pool[1] : 1078 &session->target_pool[1] :
1078 &session->target_pool[0]; 1079 &session->target_pool[0];
1079 /* Do not reorder pointer */ 1080 /* Do not reorder pointer */
1080 smp_rmb(); 1081 nvgpu_smp_rmb();
1081 head = llist_del_all(&session->targets); 1082 head = llist_del_all(&session->targets);
1082 if (head) { 1083 if (head) {
1083 1084
@@ -1102,7 +1103,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1102 llist_add(&dev->node, &arb->requests); 1103 llist_add(&dev->node, &arb->requests);
1103 } 1104 }
1104 /* Ensure target is updated before ptr sawp */ 1105 /* Ensure target is updated before ptr sawp */
1105 smp_wmb(); 1106 nvgpu_smp_wmb();
1106 xchg(&session->target, target); 1107 xchg(&session->target, target);
1107 } 1108 }
1108 1109
@@ -1148,7 +1149,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1148 if (pstate == VF_POINT_INVALID_PSTATE) { 1149 if (pstate == VF_POINT_INVALID_PSTATE) {
1149 arb->status = -EINVAL; 1150 arb->status = -EINVAL;
1150 /* make status visible */ 1151 /* make status visible */
1151 smp_mb(); 1152 nvgpu_smp_mb();
1152 goto exit_arb; 1153 goto exit_arb;
1153 } 1154 }
1154 1155
@@ -1175,7 +1176,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1175 nvgpu_mutex_release(&arb->pstate_lock); 1176 nvgpu_mutex_release(&arb->pstate_lock);
1176 1177
1177 /* make status visible */ 1178 /* make status visible */
1178 smp_mb(); 1179 nvgpu_smp_mb();
1179 goto exit_arb; 1180 goto exit_arb;
1180 } 1181 }
1181 status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); 1182 status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
@@ -1184,7 +1185,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1184 nvgpu_mutex_release(&arb->pstate_lock); 1185 nvgpu_mutex_release(&arb->pstate_lock);
1185 1186
1186 /* make status visible */ 1187 /* make status visible */
1187 smp_mb(); 1188 nvgpu_smp_mb();
1188 goto exit_arb; 1189 goto exit_arb;
1189 } 1190 }
1190 1191
@@ -1196,7 +1197,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1196 nvgpu_mutex_release(&arb->pstate_lock); 1197 nvgpu_mutex_release(&arb->pstate_lock);
1197 1198
1198 /* make status visible */ 1199 /* make status visible */
1199 smp_mb(); 1200 nvgpu_smp_mb();
1200 goto exit_arb; 1201 goto exit_arb;
1201 } 1202 }
1202 1203
@@ -1206,7 +1207,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1206 nvgpu_mutex_release(&arb->pstate_lock); 1207 nvgpu_mutex_release(&arb->pstate_lock);
1207 1208
1208 /* make status visible */ 1209 /* make status visible */
1209 smp_mb(); 1210 nvgpu_smp_mb();
1210 goto exit_arb; 1211 goto exit_arb;
1211 } 1212 }
1212 1213
@@ -1216,7 +1217,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1216 nvgpu_mutex_release(&arb->pstate_lock); 1217 nvgpu_mutex_release(&arb->pstate_lock);
1217 1218
1218 /* make status visible */ 1219 /* make status visible */
1219 smp_mb(); 1220 nvgpu_smp_mb();
1220 goto exit_arb; 1221 goto exit_arb;
1221 } 1222 }
1222 1223
@@ -1224,7 +1225,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1224 &arb->actual_pool[1] : &arb->actual_pool[0]; 1225 &arb->actual_pool[1] : &arb->actual_pool[0];
1225 1226
1226 /* do not reorder this pointer */ 1227 /* do not reorder this pointer */
1227 smp_rmb(); 1228 nvgpu_smp_rmb();
1228 actual->gpc2clk = gpc2clk_target; 1229 actual->gpc2clk = gpc2clk_target;
1229 actual->mclk = mclk_target; 1230 actual->mclk = mclk_target;
1230 arb->voltuv_actual = voltuv; 1231 arb->voltuv_actual = voltuv;
@@ -1232,7 +1233,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1232 arb->status = status; 1233 arb->status = status;
1233 1234
1234 /* Make changes visible to other threads */ 1235 /* Make changes visible to other threads */
1235 smp_wmb(); 1236 nvgpu_smp_wmb();
1236 xchg(&arb->actual, actual); 1237 xchg(&arb->actual, actual);
1237 1238
1238 status = nvgpu_lpwr_enable_pg(g, false); 1239 status = nvgpu_lpwr_enable_pg(g, false);
@@ -1241,12 +1242,12 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1241 nvgpu_mutex_release(&arb->pstate_lock); 1242 nvgpu_mutex_release(&arb->pstate_lock);
1242 1243
1243 /* make status visible */ 1244 /* make status visible */
1244 smp_mb(); 1245 nvgpu_smp_mb();
1245 goto exit_arb; 1246 goto exit_arb;
1246 } 1247 }
1247 1248
1248 /* status must be visible before atomic inc */ 1249 /* status must be visible before atomic inc */
1249 smp_wmb(); 1250 nvgpu_smp_wmb();
1250 nvgpu_atomic_inc(&arb->req_nr); 1251 nvgpu_atomic_inc(&arb->req_nr);
1251 1252
1252 /* Unlock pstate change for PG */ 1253 /* Unlock pstate change for PG */
@@ -1287,7 +1288,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1287 (curr - debug->switch_avg) * (curr - prev_avg); 1288 (curr - debug->switch_avg) * (curr - prev_avg);
1288 } 1289 }
1289 /* commit changes before exchanging debug pointer */ 1290 /* commit changes before exchanging debug pointer */
1290 smp_wmb(); 1291 nvgpu_smp_wmb();
1291 xchg(&arb->debug, debug); 1292 xchg(&arb->debug, debug);
1292#endif 1293#endif
1293 1294
@@ -1687,7 +1688,7 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
1687 do { 1688 do {
1688 target = ACCESS_ONCE(session->target); 1689 target = ACCESS_ONCE(session->target);
1689 /* no reordering of this pointer */ 1690 /* no reordering of this pointer */
1690 smp_rmb(); 1691 nvgpu_smp_rmb();
1691 1692
1692 switch (api_domain) { 1693 switch (api_domain) {
1693 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1694 case NVGPU_GPU_CLK_DOMAIN_MCLK:
@@ -1716,7 +1717,7 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
1716 do { 1717 do {
1717 actual = ACCESS_ONCE(arb->actual); 1718 actual = ACCESS_ONCE(arb->actual);
1718 /* no reordering of this pointer */ 1719 /* no reordering of this pointer */
1719 smp_rmb(); 1720 nvgpu_smp_rmb();
1720 1721
1721 switch (api_domain) { 1722 switch (api_domain) {
1722 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1723 case NVGPU_GPU_CLK_DOMAIN_MCLK:
@@ -1854,7 +1855,7 @@ static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1854 1855
1855 table = ACCESS_ONCE(arb->current_vf_table); 1856 table = ACCESS_ONCE(arb->current_vf_table);
1856 /* pointer to table can be updated by callback */ 1857 /* pointer to table can be updated by callback */
1857 smp_rmb(); 1858 nvgpu_smp_rmb();
1858 1859
1859 if (!table) 1860 if (!table)
1860 continue; 1861 continue;
@@ -2039,7 +2040,7 @@ static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
2039 2040
2040 debug = ACCESS_ONCE(arb->debug); 2041 debug = ACCESS_ONCE(arb->debug);
2041 /* Make copy of structure and ensure no reordering */ 2042 /* Make copy of structure and ensure no reordering */
2042 smp_rmb(); 2043 nvgpu_smp_rmb();
2043 if (!debug) 2044 if (!debug)
2044 return -EINVAL; 2045 return -EINVAL;
2045 2046
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index eae0475a..274e9c93 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -18,6 +18,7 @@
18#include <nvgpu/allocator.h> 18#include <nvgpu/allocator.h>
19#include <nvgpu/kmem.h> 19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h> 20#include <nvgpu/bug.h>
21#include <nvgpu/barrier.h>
21 22
22#include "bitmap_allocator_priv.h" 23#include "bitmap_allocator_priv.h"
23 24
@@ -40,7 +41,7 @@ static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a)
40 struct nvgpu_bitmap_allocator *ba = a->priv; 41 struct nvgpu_bitmap_allocator *ba = a->priv;
41 int inited = ba->inited; 42 int inited = ba->inited;
42 43
43 rmb(); 44 nvgpu_smp_rmb();
44 return inited; 45 return inited;
45} 46}
46 47
@@ -408,7 +409,7 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
408 goto fail; 409 goto fail;
409 } 410 }
410 411
411 wmb(); 412 nvgpu_smp_wmb();
412 a->inited = true; 413 a->inited = true;
413 414
414#ifdef CONFIG_DEBUG_FS 415#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 0ef94c10..3e305bb8 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -18,6 +18,7 @@
18#include <nvgpu/kmem.h> 18#include <nvgpu/kmem.h>
19#include <nvgpu/bug.h> 19#include <nvgpu/bug.h>
20#include <nvgpu/log2.h> 20#include <nvgpu/log2.h>
21#include <nvgpu/barrier.h>
21 22
22#include "gk20a/mm_gk20a.h" 23#include "gk20a/mm_gk20a.h"
23#include "gk20a/platform_gk20a.h" 24#include "gk20a/platform_gk20a.h"
@@ -1064,7 +1065,7 @@ static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a)
1064 struct nvgpu_buddy_allocator *ba = a->priv; 1065 struct nvgpu_buddy_allocator *ba = a->priv;
1065 int inited = ba->initialized; 1066 int inited = ba->initialized;
1066 1067
1067 rmb(); 1068 nvgpu_smp_rmb();
1068 return inited; 1069 return inited;
1069} 1070}
1070 1071
@@ -1289,7 +1290,7 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1289 if (err) 1290 if (err)
1290 goto fail; 1291 goto fail;
1291 1292
1292 wmb(); 1293 nvgpu_smp_wmb();
1293 a->initialized = 1; 1294 a->initialized = 1;
1294 1295
1295#ifdef CONFIG_DEBUG_FS 1296#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 73dff2c3..7f486d68 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -21,6 +21,7 @@
21#include <nvgpu/nvgpu_mem.h> 21#include <nvgpu/nvgpu_mem.h>
22#include <nvgpu/enabled.h> 22#include <nvgpu/enabled.h>
23#include <nvgpu/page_allocator.h> 23#include <nvgpu/page_allocator.h>
24#include <nvgpu/barrier.h>
24 25
25#include "gk20a/gk20a.h" 26#include "gk20a/gk20a.h"
26#include "gk20a/mm_gk20a.h" 27#include "gk20a/mm_gk20a.h"
@@ -164,8 +165,8 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
164 return err; 165 return err;
165 166
166 /* 167 /*
167 * One mb() is done after all mapping operations. Don't need individual 168 * One nvgpu_smp_mb() is done after all mapping operations. Don't need
168 * barriers for each PD write. 169 * individual barriers for each PD write.
169 */ 170 */
170 vm->pdb.mem->skip_wmb = true; 171 vm->pdb.mem->skip_wmb = true;
171 172
@@ -259,8 +260,8 @@ static int pd_allocate(struct vm_gk20a *vm,
259 } 260 }
260 261
261 /* 262 /*
262 * One mb() is done after all mapping operations. Don't need individual 263 * One nvgpu_smp_mb() is done after all mapping operations. Don't need
263 * barriers for each PD write. 264 * individual barriers for each PD write.
264 */ 265 */
265 pd->mem->skip_wmb = true; 266 pd->mem->skip_wmb = true;
266 267
@@ -714,7 +715,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
714 attrs); 715 attrs);
715 716
716 unmap_gmmu_pages(g, &vm->pdb); 717 unmap_gmmu_pages(g, &vm->pdb);
717 mb(); 718 nvgpu_smp_mb();
718 719
719 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); 720 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
720 721
@@ -983,7 +984,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
983 * There probably also needs to be a TLB invalidate as well but we leave 984 * There probably also needs to be a TLB invalidate as well but we leave
984 * that to the caller of this function. 985 * that to the caller of this function.
985 */ 986 */
986 wmb(); 987 nvgpu_smp_wmb();
987 988
988 return 0; 989 return 0;
989} 990}
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index eeb86095..8f712a14 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -17,6 +17,7 @@
17#include <nvgpu/atomic.h> 17#include <nvgpu/atomic.h>
18#include <nvgpu/allocator.h> 18#include <nvgpu/allocator.h>
19#include <nvgpu/kmem.h> 19#include <nvgpu/kmem.h>
20#include <nvgpu/barrier.h>
20 21
21#include "lockless_allocator_priv.h" 22#include "lockless_allocator_priv.h"
22 23
@@ -39,7 +40,7 @@ static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a)
39 struct nvgpu_lockless_allocator *pa = a->priv; 40 struct nvgpu_lockless_allocator *pa = a->priv;
40 int inited = pa->inited; 41 int inited = pa->inited;
41 42
42 rmb(); 43 nvgpu_smp_rmb();
43 return inited; 44 return inited;
44} 45}
45 46
@@ -198,7 +199,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
198 a->flags = flags; 199 a->flags = flags;
199 nvgpu_atomic_set(&a->nr_allocs, 0); 200 nvgpu_atomic_set(&a->nr_allocs, 0);
200 201
201 wmb(); 202 nvgpu_smp_wmb();
202 a->inited = true; 203 a->inited = true;
203 204
204#ifdef CONFIG_DEBUG_FS 205#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c
index 58108722..63597d10 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu.c
@@ -16,6 +16,7 @@
16#include <nvgpu/log.h> 16#include <nvgpu/log.h>
17#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h> 17#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
18#include <nvgpu/enabled.h> 18#include <nvgpu/enabled.h>
19#include <nvgpu/barrier.h>
19 20
20#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
21 22
@@ -394,7 +395,7 @@ void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state,
394 } 395 }
395 396
396 /* make status visible */ 397 /* make status visible */
397 smp_mb(); 398 nvgpu_smp_mb();
398} 399}
399 400
400static int nvgpu_pg_init_task(void *arg) 401static int nvgpu_pg_init_task(void *arg)
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
index 935ae95a..b435f4a7 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
@@ -14,6 +14,7 @@
14#include <nvgpu/pmu.h> 14#include <nvgpu/pmu.h>
15#include <nvgpu/log.h> 15#include <nvgpu/log.h>
16#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h> 16#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
17#include <nvgpu/barrier.h>
17 18
18#include "gk20a/gk20a.h" 19#include "gk20a/gk20a.h"
19 20
@@ -84,7 +85,7 @@ static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
84 true); 85 true);
85 WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); 86 WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
86 /* make status visible */ 87 /* make status visible */
87 smp_mb(); 88 nvgpu_smp_mb();
88 } else 89 } else
89 nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED, 90 nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED,
90 true); 91 true);
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index f50fec13..3e979ebd 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -27,6 +27,7 @@
27#include <nvgpu/hw/gk20a/hw_top_gk20a.h> 27#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
28#include <nvgpu/hw/gk20a/hw_mc_gk20a.h> 28#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
29#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 29#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
30#include <nvgpu/barrier.h>
30 31
31static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) 32static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
32{ 33{
@@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
654 /* take always the postfence as it is needed for protecting the ce context */ 655 /* take always the postfence as it is needed for protecting the ce context */
655 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; 656 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
656 657
657 wmb(); 658 nvgpu_smp_wmb();
658 659
659 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, 660 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
660 1, submit_flags, &fence, 661 1, submit_flags, &fence,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5f81b441..0c1b06e9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -35,6 +35,7 @@
35#include <nvgpu/enabled.h> 35#include <nvgpu/enabled.h>
36#include <nvgpu/debug.h> 36#include <nvgpu/debug.h>
37#include <nvgpu/ltc.h> 37#include <nvgpu/ltc.h>
38#include <nvgpu/barrier.h>
38 39
39#include "gk20a.h" 40#include "gk20a.h"
40#include "ctxsw_trace_gk20a.h" 41#include "ctxsw_trace_gk20a.h"
@@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
245 246
246 /* 247 /*
247 * ensure put is read before any subsequent reads. 248 * ensure put is read before any subsequent reads.
248 * see corresponding wmb in gk20a_channel_add_job() 249 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
249 */ 250 */
250 rmb(); 251 nvgpu_smp_rmb();
251 252
252 while (tmp_get != put) { 253 while (tmp_get != put) {
253 job = &ch->joblist.pre_alloc.jobs[tmp_get]; 254 job = &ch->joblist.pre_alloc.jobs[tmp_get];
@@ -618,7 +619,7 @@ unbind:
618 /* make sure we catch accesses of unopened channels in case 619 /* make sure we catch accesses of unopened channels in case
619 * there's non-refcounted channel pointers hanging around */ 620 * there's non-refcounted channel pointers hanging around */
620 ch->g = NULL; 621 ch->g = NULL;
621 wmb(); 622 nvgpu_smp_wmb();
622 623
623 /* ALWAYS last */ 624 /* ALWAYS last */
624 free_channel(f, ch); 625 free_channel(f, ch);
@@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
880 * gk20a_free_channel() */ 881 * gk20a_free_channel() */
881 ch->referenceable = true; 882 ch->referenceable = true;
882 nvgpu_atomic_set(&ch->ref_count, 1); 883 nvgpu_atomic_set(&ch->ref_count, 1);
883 wmb(); 884 nvgpu_smp_wmb();
884 885
885 return ch; 886 return ch;
886} 887}
@@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
993 994
994 /* 995 /*
995 * commit the previous writes before making the entry valid. 996 * commit the previous writes before making the entry valid.
996 * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). 997 * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf().
997 */ 998 */
998 wmb(); 999 nvgpu_smp_wmb();
999 1000
1000 e->valid = true; 1001 e->valid = true;
1001 gk20a_dbg_fn("done"); 1002 gk20a_dbg_fn("done");
@@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c,
1025 1026
1026 /* 1027 /*
1027 * ensure all subsequent reads happen after reading get. 1028 * ensure all subsequent reads happen after reading get.
1028 * see corresponding wmb in gk20a_channel_clean_up_jobs() 1029 * see corresponding nvgpu_smp_wmb in
1030 * gk20a_channel_clean_up_jobs()
1029 */ 1031 */
1030 rmb(); 1032 nvgpu_smp_rmb();
1031 1033
1032 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) 1034 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
1033 *job_out = &c->joblist.pre_alloc.jobs[put]; 1035 *job_out = &c->joblist.pre_alloc.jobs[put];
@@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
1137{ 1139{
1138 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; 1140 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
1139 1141
1140 rmb(); 1142 nvgpu_smp_rmb();
1141 return pre_alloc_enabled; 1143 return pre_alloc_enabled;
1142} 1144}
1143 1145
@@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
1194 1196
1195 /* 1197 /*
1196 * commit the previous writes before setting the flag. 1198 * commit the previous writes before setting the flag.
1197 * see corresponding rmb in channel_gk20a_is_prealloc_enabled() 1199 * see corresponding nvgpu_smp_rmb in
1200 * channel_gk20a_is_prealloc_enabled()
1198 */ 1201 */
1199 wmb(); 1202 nvgpu_smp_wmb();
1200 c->joblist.pre_alloc.enabled = true; 1203 c->joblist.pre_alloc.enabled = true;
1201 1204
1202 return 0; 1205 return 0;
@@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
1218 1221
1219 /* 1222 /*
1220 * commit the previous writes before disabling the flag. 1223 * commit the previous writes before disabling the flag.
1221 * see corresponding rmb in channel_gk20a_is_prealloc_enabled() 1224 * see corresponding nvgpu_smp_rmb in
1225 * channel_gk20a_is_prealloc_enabled()
1222 */ 1226 */
1223 wmb(); 1227 nvgpu_smp_wmb();
1224 c->joblist.pre_alloc.enabled = false; 1228 c->joblist.pre_alloc.enabled = false;
1225} 1229}
1226 1230
@@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
1741 /* 1745 /*
1742 * Currently, the only work type is associated with a lock, which deals 1746 * Currently, the only work type is associated with a lock, which deals
1743 * with any necessary barriers. If a work type with no locking were 1747 * with any necessary barriers. If a work type with no locking were
1744 * added, a a wmb() would be needed here. See ..worker_pending() for a 1748 * added, a nvgpu_smp_wmb() would be needed here. See
1745 * pair. 1749 * ..worker_pending() for a pair.
1746 */ 1750 */
1747 1751
1748 put = nvgpu_atomic_inc_return(&g->channel_worker.put); 1752 put = nvgpu_atomic_inc_return(&g->channel_worker.put);
@@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
1764 bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; 1768 bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
1765 1769
1766 /* 1770 /*
1767 * This would be the place for a rmb() pairing a wmb() for a wakeup 1771 * This would be the place for a nvgpu_smp_rmb() pairing
1768 * if we had any work with no implicit barriers caused by locking. 1772 * a nvgpu_smp_wmb() for a wakeup if we had any work with
1773 * no implicit barriers caused by locking.
1769 */ 1774 */
1770 1775
1771 return pending; 1776 return pending;
@@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
1939 1944
1940 if (e->valid) { 1945 if (e->valid) {
1941 /* read the entry's valid flag before reading its contents */ 1946 /* read the entry's valid flag before reading its contents */
1942 rmb(); 1947 nvgpu_smp_rmb();
1943 if ((q->get != e->off) && e->off != 0) 1948 if ((q->get != e->off) && e->off != 0)
1944 nvgpu_err(g, "requests out-of-order, ch=%d", 1949 nvgpu_err(g, "requests out-of-order, ch=%d",
1945 c->chid); 1950 c->chid);
@@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1984 1989
1985 /* 1990 /*
1986 * ensure all pending write complete before adding to the list. 1991 * ensure all pending write complete before adding to the list.
1987 * see corresponding rmb in gk20a_channel_clean_up_jobs() & 1992 * see corresponding nvgpu_smp_rmb in
1993 * gk20a_channel_clean_up_jobs() &
1988 * gk20a_channel_abort_clean_up() 1994 * gk20a_channel_abort_clean_up()
1989 */ 1995 */
1990 wmb(); 1996 nvgpu_smp_wmb();
1991 channel_gk20a_joblist_add(c, job); 1997 channel_gk20a_joblist_add(c, job);
1992 1998
1993 if (!pre_alloc_enabled) 1999 if (!pre_alloc_enabled)
@@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2061 2067
2062 /* 2068 /*
2063 * ensure that all subsequent reads occur after checking 2069 * ensure that all subsequent reads occur after checking
2064 * that we have a valid node. see corresponding wmb in 2070 * that we have a valid node. see corresponding nvgpu_smp_wmb in
2065 * gk20a_channel_add_job(). 2071 * gk20a_channel_add_job().
2066 */ 2072 */
2067 rmb(); 2073 nvgpu_smp_rmb();
2068 job = channel_gk20a_joblist_peek(c); 2074 job = channel_gk20a_joblist_peek(c);
2069 channel_gk20a_joblist_unlock(c); 2075 channel_gk20a_joblist_unlock(c);
2070 2076
@@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2127 2133
2128 /* 2134 /*
2129 * ensure all pending writes complete before freeing up the job. 2135 * ensure all pending writes complete before freeing up the job.
2130 * see corresponding rmb in channel_gk20a_alloc_job(). 2136 * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job().
2131 */ 2137 */
2132 wmb(); 2138 nvgpu_smp_wmb();
2133 2139
2134 channel_gk20a_free_job(c, job); 2140 channel_gk20a_free_job(c, job);
2135 job_finished = 1; 2141 job_finished = 1;
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 546917f1..91c3b206 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -29,6 +29,7 @@
29 29
30#include <nvgpu/log.h> 30#include <nvgpu/log.h>
31#include <nvgpu/atomic.h> 31#include <nvgpu/atomic.h>
32#include <nvgpu/barrier.h>
32 33
33#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> 34#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 35#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
@@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
635 dev->ents[write_idx] = *entry; 636 dev->ents[write_idx] = *entry;
636 637
637 /* ensure record is written before updating write index */ 638 /* ensure record is written before updating write index */
638 smp_wmb(); 639 nvgpu_smp_wmb();
639 640
640 write_idx++; 641 write_idx++;
641 if (unlikely(write_idx >= hdr->num_ents)) 642 if (unlikely(write_idx >= hdr->num_ents))
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 5ee90440..fea3b0fa 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -22,6 +22,7 @@
22#include <nvgpu/hashtable.h> 22#include <nvgpu/hashtable.h>
23#include <nvgpu/circ_buf.h> 23#include <nvgpu/circ_buf.h>
24#include <nvgpu/thread.h> 24#include <nvgpu/thread.h>
25#include <nvgpu/barrier.h>
25 26
26#include "ctxsw_trace_gk20a.h" 27#include "ctxsw_trace_gk20a.h"
27#include "fecs_trace_gk20a.h" 28#include "fecs_trace_gk20a.h"
@@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
370 } 371 }
371 372
372 /* ensure FECS records has been updated before incrementing read index */ 373 /* ensure FECS records has been updated before incrementing read index */
373 wmb(); 374 nvgpu_smp_wmb();
374 gk20a_fecs_trace_set_read_index(g, read); 375 gk20a_fecs_trace_set_read_index(g, read);
375 376
376done: 377done:
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 8ad24c44..5fa9a0df 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -21,6 +21,7 @@
21#include <nvgpu/kmem.h> 21#include <nvgpu/kmem.h>
22#include <nvgpu/soc.h> 22#include <nvgpu/soc.h>
23#include <nvgpu/nvhost.h> 23#include <nvgpu/nvhost.h>
24#include <nvgpu/barrier.h>
24 25
25#include "gk20a.h" 26#include "gk20a.h"
26#include "channel_gk20a.h" 27#include "channel_gk20a.h"
@@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
73{ 74{
74 bool valid = f->valid; 75 bool valid = f->valid;
75 76
76 rmb(); 77 nvgpu_smp_rmb();
77 return valid; 78 return valid;
78} 79}
79 80
@@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore(
252 f->semaphore_wq = semaphore_wq; 253 f->semaphore_wq = semaphore_wq;
253 254
254 /* commit previous writes before setting the valid flag */ 255 /* commit previous writes before setting the valid flag */
255 wmb(); 256 nvgpu_smp_wmb();
256 f->valid = true; 257 f->valid = true;
257 258
258 return 0; 259 return 0;
@@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt(
327 f->syncpt_value = value; 328 f->syncpt_value = value;
328 329
329 /* commit previous writes before setting the valid flag */ 330 /* commit previous writes before setting the valid flag */
330 wmb(); 331 nvgpu_smp_wmb();
331 f->valid = true; 332 f->valid = true;
332 333
333 return 0; 334 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 47e7d82e..fd249bc9 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -30,6 +30,7 @@
30#include <nvgpu/log2.h> 30#include <nvgpu/log2.h>
31#include <nvgpu/debug.h> 31#include <nvgpu/debug.h>
32#include <nvgpu/nvhost.h> 32#include <nvgpu/nvhost.h>
33#include <nvgpu/barrier.h>
33 34
34#include "gk20a.h" 35#include "gk20a.h"
35#include "ctxsw_trace_gk20a.h" 36#include "ctxsw_trace_gk20a.h"
@@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
966 v = gk20a_bar1_readl(g, bar1_vaddr); 967 v = gk20a_bar1_readl(g, bar1_vaddr);
967 968
968 *cpu_vaddr = v1; 969 *cpu_vaddr = v1;
969 smp_mb(); 970 nvgpu_smp_mb();
970 971
971 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { 972 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
972 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ 973 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
@@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1309 if (refch) { 1310 if (refch) {
1310 /* mark channel as faulted */ 1311 /* mark channel as faulted */
1311 refch->has_timedout = true; 1312 refch->has_timedout = true;
1312 wmb(); 1313 nvgpu_smp_wmb();
1313 /* unblock pending waits */ 1314 /* unblock pending waits */
1314 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); 1315 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1315 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); 1316 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 19ea76cb..ab2d0b7f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample;
49#include <nvgpu/falcon.h> 49#include <nvgpu/falcon.h>
50#include <nvgpu/pmu.h> 50#include <nvgpu/pmu.h>
51#include <nvgpu/atomic.h> 51#include <nvgpu/atomic.h>
52#include <nvgpu/barrier.h>
52 53
53#include "clk_gk20a.h" 54#include "clk_gk20a.h"
54#include "ce2_gk20a.h" 55#include "ce2_gk20a.h"
@@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v)
1324 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); 1325 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
1325 } else { 1326 } else {
1326 writel_relaxed(v, g->regs + r); 1327 writel_relaxed(v, g->regs + r);
1327 wmb(); 1328 nvgpu_smp_wmb();
1328 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); 1329 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
1329 } 1330 }
1330} 1331}
@@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v)
1351 __gk20a_warn_on_no_regs(); 1352 __gk20a_warn_on_no_regs();
1352 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); 1353 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
1353 } else { 1354 } else {
1354 wmb(); 1355 nvgpu_smp_wmb();
1355 do { 1356 do {
1356 writel_relaxed(v, g->regs + r); 1357 writel_relaxed(v, g->regs + r);
1357 } while (readl(g->regs + r) != v); 1358 } while (readl(g->regs + r) != v);
@@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v)
1365 __gk20a_warn_on_no_regs(); 1366 __gk20a_warn_on_no_regs();
1366 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); 1367 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
1367 } else { 1368 } else {
1368 wmb(); 1369 nvgpu_smp_wmb();
1369 writel_relaxed(v, g->bar1 + b); 1370 writel_relaxed(v, g->bar1 + b);
1370 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); 1371 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
1371 } 1372 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cd1d31a5..27442947 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -31,6 +31,7 @@
31#include <nvgpu/firmware.h> 31#include <nvgpu/firmware.h>
32#include <nvgpu/enabled.h> 32#include <nvgpu/enabled.h>
33#include <nvgpu/debug.h> 33#include <nvgpu/debug.h>
34#include <nvgpu/barrier.h>
34 35
35#include "gk20a.h" 36#include "gk20a.h"
36#include "kind_gk20a.h" 37#include "kind_gk20a.h"
@@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
554 gk20a_writel(g, gr_fecs_method_push_r(), 555 gk20a_writel(g, gr_fecs_method_push_r(),
555 gr_fecs_method_push_adr_f(op.method.addr)); 556 gr_fecs_method_push_adr_f(op.method.addr));
556 557
557 /* op.mb.id == 4 cases require waiting for completion on 558 /* op.mailbox.id == 4 cases require waiting for completion on
558 * for op.mb.id == 0 */ 559 * for op.mailbox.id == 0 */
559 if (op.mailbox.id == 4) 560 if (op.mailbox.id == 4)
560 op.mailbox.id = 0; 561 op.mailbox.id = 0;
561 562
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index e688c863..8e913f23 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -21,6 +21,7 @@
21#include <nvgpu/timers.h> 21#include <nvgpu/timers.h>
22#include <nvgpu/log.h> 22#include <nvgpu/log.h>
23#include <nvgpu/atomic.h> 23#include <nvgpu/atomic.h>
24#include <nvgpu/barrier.h>
24 25
25#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> 26#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
26#include <nvgpu/hw/gm20b/hw_ram_gm20b.h> 27#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -50,7 +51,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
50 (gk20a_readl(g, ccsr_channel_r(c->chid)) & 51 (gk20a_readl(g, ccsr_channel_r(c->chid)) &
51 ~ccsr_channel_enable_set_f(~0)) | 52 ~ccsr_channel_enable_set_f(~0)) |
52 ccsr_channel_enable_set_true_f()); 53 ccsr_channel_enable_set_true_f());
53 wmb(); 54 nvgpu_smp_wmb();
54 nvgpu_atomic_set(&c->bound, true); 55 nvgpu_atomic_set(&c->bound, true);
55} 56}
56 57
diff --git a/drivers/gpu/nvgpu/include/nvgpu/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/barrier.h
new file mode 100644
index 00000000..26eec3ed
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/barrier.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17/* This file contains NVGPU_* high-level abstractions for various
18 * memor-barrier operations available in linux/kernel. Every OS
19 * should provide their own OS specific calls under this common API
20 */
21
22#ifndef __NVGPU_BARRIER_H__
23#define __NVGPU_BARRIER_H__
24
25#ifdef __KERNEL__
26#include <nvgpu/linux/barrier.h>
27#endif
28
29#define nvgpu_mb() __nvgpu_mb()
30#define nvgpu_rmb() __nvgpu_rmb()
31#define nvgpu_wmb() __nvgpu_wmb()
32
33#define nvgpu_smp_mb() __nvgpu_smp_mb()
34#define nvgpu_smp_rmb() __nvgpu_smp_rmb()
35#define nvgpu_smp_wmb() __nvgpu_smp_wmb()
36
37#define nvgpu_read_barrier_depends() __nvgpu_read_barrier_depends()
38#define nvgpu_smp_read_barrier_depends() __nvgpu_smp_read_barrier_depends()
39
40#endif /* __NVGPU_BARRIER_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h
new file mode 100644
index 00000000..e7b83ee8
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h
@@ -0,0 +1,33 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_BARRIER_LINUX_H__
18#define __NVGPU_BARRIER_LINUX_H__
19
20#include <asm/barrier.h>
21
22#define __nvgpu_mb() mb()
23#define __nvgpu_rmb() rmb()
24#define __nvgpu_wmb() wmb()
25
26#define __nvgpu_smp_mb() smp_mb()
27#define __nvgpu_smp_rmb() smp_rmb()
28#define __nvgpu_smp_wmb() smp_wmb()
29
30#define __nvgpu_read_barrier_depends() read_barrier_depends()
31#define __nvgpu_smp_read_barrier_depends() smp_read_barrier_depends()
32
33#endif /* __NVGPU_BARRIER_LINUX_H__ */
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index c8519905..3f03e25a 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -20,6 +20,7 @@
20#include <nvgpu/dma.h> 20#include <nvgpu/dma.h>
21#include <nvgpu/atomic.h> 21#include <nvgpu/atomic.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/barrier.h>
23 24
24#include "vgpu/vgpu.h" 25#include "vgpu/vgpu.h"
25#include "gk20a/ctxsw_trace_gk20a.h" 26#include "gk20a/ctxsw_trace_gk20a.h"
@@ -42,7 +43,7 @@ static void vgpu_channel_bind(struct channel_gk20a *ch)
42 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 43 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
43 WARN_ON(err || msg.ret); 44 WARN_ON(err || msg.ret);
44 45
45 wmb(); 46 nvgpu_smp_wmb();
46 nvgpu_atomic_set(&ch->bound, true); 47 nvgpu_atomic_set(&ch->bound, true);
47} 48}
48 49
@@ -370,7 +371,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g)
370 v = gk20a_bar1_readl(g, bar1_vaddr); 371 v = gk20a_bar1_readl(g, bar1_vaddr);
371 372
372 *cpu_vaddr = v1; 373 *cpu_vaddr = v1;
373 smp_mb(); 374 nvgpu_smp_mb();
374 375
375 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { 376 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
376 nvgpu_err(g, "bar1 broken @ gk20a!"); 377 nvgpu_err(g, "bar1 broken @ gk20a!");
@@ -728,7 +729,7 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
728 729
729 /* mark channel as faulted */ 730 /* mark channel as faulted */
730 ch->has_timedout = true; 731 ch->has_timedout = true;
731 wmb(); 732 nvgpu_smp_wmb();
732 /* unblock pending waits */ 733 /* unblock pending waits */
733 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); 734 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
734 nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); 735 nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);