From 81868a187fa3b217368206f17b19309846e8e7fb Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Fri, 18 Aug 2017 16:22:29 +0530
Subject: gpu: nvgpu: Nvgpu abstraction for linux barriers.

construct wrapper nvgpu_* methods to replace
mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and
smp_read_barrier_depends.

NVGPU-122

Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df
Signed-off-by: Debarshi <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541199
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sourab Gupta <sourabg@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.c                  | 43 ++++++++++---------
 drivers/gpu/nvgpu/common/mm/bitmap_allocator.c   |  5 ++-
 drivers/gpu/nvgpu/common/mm/buddy_allocator.c    |  5 ++-
 drivers/gpu/nvgpu/common/mm/gmmu.c               | 13 +++---
 drivers/gpu/nvgpu/common/mm/lockless_allocator.c |  5 ++-
 drivers/gpu/nvgpu/common/pmu/pmu.c               |  3 +-
 drivers/gpu/nvgpu/common/pmu/pmu_pg.c            |  3 +-
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c              |  3 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c          | 54 +++++++++++++-----------
 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c      |  3 +-
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c       |  3 +-
 drivers/gpu/nvgpu/gk20a/fence_gk20a.c            |  7 +--
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c             |  5 ++-
 drivers/gpu/nvgpu/gk20a/gk20a.h                  |  7 +--
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c               |  5 ++-
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c             |  3 +-
 drivers/gpu/nvgpu/include/nvgpu/barrier.h        | 40 ++++++++++++++++++
 drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h  | 33 +++++++++++++++
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c               |  7 +--
 19 files changed, 171 insertions(+), 76 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/barrier.h
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index b00ecd31..f1de54c6 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -27,6 +27,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/log.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a/gk20a.h"
 #include "clk/clk_arb.h"
@@ -386,7 +387,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 		goto init_fail;
 	do {
 		/* Check that first run is completed */
-		smp_mb();
+		nvgpu_smp_mb();
 		wait_event_interruptible(arb->request_wq,
 			nvgpu_atomic_read(&arb->req_nr));
 	} while (!nvgpu_atomic_read(&arb->req_nr));
@@ -578,7 +579,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
 	/* make sure that the initialization of the pool is visible
 	 * before the update */
-	smp_wmb();
+	nvgpu_smp_wmb();
 	session->target = &session->target_pool[0];
 
 	init_llist_head(&session->targets);
@@ -706,7 +707,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 	table = ACCESS_ONCE(arb->current_vf_table);
 	/* make flag visible when all data has resolved in the tables */
-	smp_rmb();
+	nvgpu_smp_rmb();
 
 	table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
 		&arb->vf_table_pool[0];
@@ -980,7 +981,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 	}
 
 	/* make table visible when all data has resolved in the tables */
-	smp_wmb();
+	nvgpu_smp_wmb();
 	xchg(&arb->current_vf_table, table);
 
 exit_vf_table:
@@ -1077,7 +1078,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 					&session->target_pool[1] :
 					&session->target_pool[0];
 			/* Do not reorder pointer */
-			smp_rmb();
+			nvgpu_smp_rmb();
 			head = llist_del_all(&session->targets);
 			if (head) {
 
@@ -1102,7 +1103,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 					llist_add(&dev->node, &arb->requests);
 				}
 				/* Ensure target is updated before ptr sawp */
-				smp_wmb();
+				nvgpu_smp_wmb();
 				xchg(&session->target, target);
 			}
 
@@ -1148,7 +1149,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	if (pstate == VF_POINT_INVALID_PSTATE) {
 		arb->status = -EINVAL;
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 
@@ -1175,7 +1176,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
@@ -1184,7 +1185,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 
@@ -1196,7 +1197,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 
@@ -1206,7 +1207,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 
@@ -1216,7 +1217,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 
@@ -1224,7 +1225,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 			&arb->actual_pool[1] : &arb->actual_pool[0];
 
 	/* do not reorder this pointer */
-	smp_rmb();
+	nvgpu_smp_rmb();
 	actual->gpc2clk = gpc2clk_target;
 	actual->mclk = mclk_target;
 	arb->voltuv_actual = voltuv;
@@ -1232,7 +1233,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	arb->status = status;
 
 	/* Make changes visible to other threads */
-	smp_wmb();
+	nvgpu_smp_wmb();
 	xchg(&arb->actual, actual);
 
 	status = nvgpu_lpwr_enable_pg(g, false);
@@ -1241,12 +1242,12 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
-		smp_mb();
+		nvgpu_smp_mb();
 		goto exit_arb;
 	}
 
 	/* status must be visible before atomic inc */
-	smp_wmb();
+	nvgpu_smp_wmb();
 	nvgpu_atomic_inc(&arb->req_nr);
 
 	/* Unlock pstate change for PG */
@@ -1287,7 +1288,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 			(curr - debug->switch_avg) * (curr - prev_avg);
 	}
 	/* commit changes before exchanging debug pointer */
-	smp_wmb();
+	nvgpu_smp_wmb();
 	xchg(&arb->debug, debug);
 #endif
 
@@ -1687,7 +1688,7 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
 	do {
 		target = ACCESS_ONCE(session->target);
 		/* no reordering of this pointer */
-		smp_rmb();
+		nvgpu_smp_rmb();
 
 		switch (api_domain) {
 		case NVGPU_GPU_CLK_DOMAIN_MCLK:
@@ -1716,7 +1717,7 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
 	do {
 		actual = ACCESS_ONCE(arb->actual);
 		/* no reordering of this pointer */
-		smp_rmb();
+		nvgpu_smp_rmb();
 
 		switch (api_domain) {
 		case NVGPU_GPU_CLK_DOMAIN_MCLK:
@@ -1854,7 +1855,7 @@ static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 
 		table = ACCESS_ONCE(arb->current_vf_table);
 		/* pointer to table can be updated by callback */
-		smp_rmb();
+		nvgpu_smp_rmb();
 
 		if (!table)
 			continue;
@@ -2039,7 +2040,7 @@ static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
 
 	debug = ACCESS_ONCE(arb->debug);
 	/* Make copy of structure and ensure no reordering */
-	smp_rmb();
+	nvgpu_smp_rmb();
 	if (!debug)
 		return -EINVAL;
 
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index eae0475a..274e9c93 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -18,6 +18,7 @@
 #include <nvgpu/allocator.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/barrier.h>
 
 #include "bitmap_allocator_priv.h"
 
@@ -40,7 +41,7 @@ static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a)
 	struct nvgpu_bitmap_allocator *ba = a->priv;
 	int inited = ba->inited;
 
-	rmb();
+	nvgpu_smp_rmb();
 	return inited;
 }
 
@@ -408,7 +409,7 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 		goto fail;
 	}
 
-	wmb();
+	nvgpu_smp_wmb();
 	a->inited = true;
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 0ef94c10..3e305bb8 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -18,6 +18,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/log2.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a/mm_gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -1064,7 +1065,7 @@ static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a)
 	struct nvgpu_buddy_allocator *ba = a->priv;
 	int inited = ba->initialized;
 
-	rmb();
+	nvgpu_smp_rmb();
 	return inited;
 }
 
@@ -1289,7 +1290,7 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	if (err)
 		goto fail;
 
-	wmb();
+	nvgpu_smp_wmb();
 	a->initialized = 1;
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 73dff2c3..7f486d68 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -21,6 +21,7 @@
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/page_allocator.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
@@ -164,8 +165,8 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
 		return err;
 
 	/*
-	 * One mb() is done after all mapping operations. Don't need individual
-	 * barriers for each PD write.
+	 * One nvgpu_smp_mb() is done after all mapping operations. Don't need
+	 * individual barriers for each PD write.
 	 */
 	vm->pdb.mem->skip_wmb = true;
 
@@ -259,8 +260,8 @@ static int pd_allocate(struct vm_gk20a *vm,
 	}
 
 	/*
-	 * One mb() is done after all mapping operations. Don't need individual
-	 * barriers for each PD write.
+	 * One nvgpu_smp_mb() is done after all mapping operations. Don't need
+	 * individual barriers for each PD write.
 	 */
 	pd->mem->skip_wmb = true;
 
@@ -714,7 +715,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 							    attrs);
 
 	unmap_gmmu_pages(g, &vm->pdb);
-	mb();
+	nvgpu_smp_mb();
 
 	__gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
 
@@ -983,7 +984,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
 	 * There probably also needs to be a TLB invalidate as well but we leave
 	 * that to the caller of this function.
 	 */
-	wmb();
+	nvgpu_smp_wmb();
 
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index eeb86095..8f712a14 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -17,6 +17,7 @@
 #include <nvgpu/atomic.h>
 #include <nvgpu/allocator.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/barrier.h>
 
 #include "lockless_allocator_priv.h"
 
@@ -39,7 +40,7 @@ static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a)
 	struct nvgpu_lockless_allocator *pa = a->priv;
 	int inited = pa->inited;
 
-	rmb();
+	nvgpu_smp_rmb();
 	return inited;
 }
 
@@ -198,7 +199,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	a->flags = flags;
 	nvgpu_atomic_set(&a->nr_allocs, 0);
 
-	wmb();
+	nvgpu_smp_wmb();
 	a->inited = true;
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c
index 58108722..63597d10 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu.c
@@ -16,6 +16,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a/gk20a.h"
 
@@ -394,7 +395,7 @@ void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state,
 	}
 
 	/* make status visible */
-	smp_mb();
+	nvgpu_smp_mb();
 }
 
 static int nvgpu_pg_init_task(void *arg)
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
index 935ae95a..b435f4a7 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
@@ -14,6 +14,7 @@
 #include <nvgpu/pmu.h>
 #include <nvgpu/log.h>
 #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a/gk20a.h"
 
@@ -84,7 +85,7 @@ static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
 					true);
 				WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
 				/* make status visible */
-				smp_mb();
+				nvgpu_smp_mb();
 			} else
 				nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED,
 					true);
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index f50fec13..3e979ebd 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -27,6 +27,7 @@
 #include <nvgpu/hw/gk20a/hw_top_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+#include <nvgpu/barrier.h>
 
 static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
 {
@@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 		/* take always the postfence as it is needed for protecting the ce context */
 		submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
 
-		wmb();
+		nvgpu_smp_wmb();
 
 		ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
 					1, submit_flags, &fence,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5f81b441..0c1b06e9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -35,6 +35,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/ltc.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a.h"
 #include "ctxsw_trace_gk20a.h"
@@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 
 		/*
 		 * ensure put is read before any subsequent reads.
-		 * see corresponding wmb in gk20a_channel_add_job()
+		 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
 		 */
-		rmb();
+		nvgpu_smp_rmb();
 
 		while (tmp_get != put) {
 			job = &ch->joblist.pre_alloc.jobs[tmp_get];
@@ -618,7 +619,7 @@ unbind:
 	/* make sure we catch accesses of unopened channels in case
 	 * there's non-refcounted channel pointers hanging around */
 	ch->g = NULL;
-	wmb();
+	nvgpu_smp_wmb();
 
 	/* ALWAYS last */
 	free_channel(f, ch);
@@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 	 * gk20a_free_channel() */
 	ch->referenceable = true;
 	nvgpu_atomic_set(&ch->ref_count, 1);
-	wmb();
+	nvgpu_smp_wmb();
 
 	return ch;
 }
@@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
 
 	/*
 	 * commit the previous writes before making the entry valid.
-	 * see the corresponding rmb() in gk20a_free_priv_cmdbuf().
+	 * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf().
 	 */
-	wmb();
+	nvgpu_smp_wmb();
 
 	e->valid = true;
 	gk20a_dbg_fn("done");
@@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c,
 
 		/*
 		 * ensure all subsequent reads happen after reading get.
-		 * see corresponding wmb in gk20a_channel_clean_up_jobs()
+		 * see corresponding nvgpu_smp_wmb in
+		 * gk20a_channel_clean_up_jobs()
 		 */
-		rmb();
+		nvgpu_smp_rmb();
 
 		if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
 			*job_out = &c->joblist.pre_alloc.jobs[put];
@@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
 {
 	bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
 
-	rmb();
+	nvgpu_smp_rmb();
 	return pre_alloc_enabled;
 }
 
@@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
 
 	/*
 	 * commit the previous writes before setting the flag.
-	 * see corresponding rmb in channel_gk20a_is_prealloc_enabled()
+	 * see corresponding nvgpu_smp_rmb in
+	 * channel_gk20a_is_prealloc_enabled()
 	 */
-	wmb();
+	nvgpu_smp_wmb();
 	c->joblist.pre_alloc.enabled = true;
 
 	return 0;
@@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
 
 	/*
 	 * commit the previous writes before disabling the flag.
-	 * see corresponding rmb in channel_gk20a_is_prealloc_enabled()
+	 * see corresponding nvgpu_smp_rmb in
+	 * channel_gk20a_is_prealloc_enabled()
 	 */
-	wmb();
+	nvgpu_smp_wmb();
 	c->joblist.pre_alloc.enabled = false;
 }
 
@@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
 	/*
 	 * Currently, the only work type is associated with a lock, which deals
 	 * with any necessary barriers. If a work type with no locking were
-	 * added, a a wmb() would be needed here. See ..worker_pending() for a
-	 * pair.
+	 * added, a nvgpu_smp_wmb() would be needed here. See
+	 * ..worker_pending() for a pair.
 	 */
 
 	put = nvgpu_atomic_inc_return(&g->channel_worker.put);
@@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
 	bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
 
 	/*
-	 * This would be the place for a rmb() pairing a wmb() for a wakeup
-	 * if we had any work with no implicit barriers caused by locking.
+	 * This would be the place for a nvgpu_smp_rmb() pairing
+	 * a nvgpu_smp_wmb() for a wakeup if we had any work with
+	 * no implicit barriers caused by locking.
 	 */
 
 	return pending;
@@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
 
 	if (e->valid) {
 		/* read the entry's valid flag before reading its contents */
-		rmb();
+		nvgpu_smp_rmb();
 		if ((q->get != e->off) && e->off != 0)
 			nvgpu_err(g, "requests out-of-order, ch=%d",
 				  c->chid);
@@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 
 		/*
 		 * ensure all pending write complete before adding to the list.
-		 * see corresponding rmb in gk20a_channel_clean_up_jobs() &
+		 * see corresponding nvgpu_smp_rmb in
+		 * gk20a_channel_clean_up_jobs() &
 		 * gk20a_channel_abort_clean_up()
 		 */
-		wmb();
+		nvgpu_smp_wmb();
 		channel_gk20a_joblist_add(c, job);
 
 		if (!pre_alloc_enabled)
@@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 
 		/*
 		 * ensure that all subsequent reads occur after checking
-		 * that we have a valid node. see corresponding wmb in
+		 * that we have a valid node. see corresponding nvgpu_smp_wmb in
 		 * gk20a_channel_add_job().
 		 */
-		rmb();
+		nvgpu_smp_rmb();
 		job = channel_gk20a_joblist_peek(c);
 		channel_gk20a_joblist_unlock(c);
 
@@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 
 		/*
 		 * ensure all pending writes complete before freeing up the job.
-		 * see corresponding rmb in channel_gk20a_alloc_job().
+		 * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job().
 		 */
-		wmb();
+		nvgpu_smp_wmb();
 
 		channel_gk20a_free_job(c, job);
 		job_finished = 1;
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 546917f1..91c3b206 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -29,6 +29,7 @@
 
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
 
 #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
@@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
 	dev->ents[write_idx] = *entry;
 
 	/* ensure record is written before updating write index */
-	smp_wmb();
+	nvgpu_smp_wmb();
 
 	write_idx++;
 	if (unlikely(write_idx >= hdr->num_ents))
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 5ee90440..fea3b0fa 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -22,6 +22,7 @@
 #include <nvgpu/hashtable.h>
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
+#include <nvgpu/barrier.h>
 
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
@@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
 	}
 
 	/* ensure FECS records has been updated before incrementing read index */
-	wmb();
+	nvgpu_smp_wmb();
 	gk20a_fecs_trace_set_read_index(g, read);
 
 done:
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 8ad24c44..5fa9a0df 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -21,6 +21,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/nvhost.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a.h"
 #include "channel_gk20a.h"
@@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
 {
 	bool valid = f->valid;
 
-	rmb();
+	nvgpu_smp_rmb();
 	return valid;
 }
 
@@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore(
 	f->semaphore_wq = semaphore_wq;
 
 	/* commit previous writes before setting the valid flag */
-	wmb();
+	nvgpu_smp_wmb();
 	f->valid = true;
 
 	return 0;
@@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt(
 	f->syncpt_value = value;
 
 	/* commit previous writes before setting the valid flag */
-	wmb();
+	nvgpu_smp_wmb();
 	f->valid = true;
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 47e7d82e..fd249bc9 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -30,6 +30,7 @@
 #include <nvgpu/log2.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/nvhost.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a.h"
 #include "ctxsw_trace_gk20a.h"
@@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
 		v = gk20a_bar1_readl(g, bar1_vaddr);
 
 		*cpu_vaddr = v1;
-		smp_mb();
+		nvgpu_smp_mb();
 
 		if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
 			nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
@@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
 	if (refch) {
 		/* mark channel as faulted */
 		refch->has_timedout = true;
-		wmb();
+		nvgpu_smp_wmb();
 		/* unblock pending waits */
 		nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
 		nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 19ea76cb..ab2d0b7f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample;
 #include <nvgpu/falcon.h>
 #include <nvgpu/pmu.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
 
 #include "clk_gk20a.h"
 #include "ce2_gk20a.h"
@@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v)
 		gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
 	} else {
 		writel_relaxed(v, g->regs + r);
-		wmb();
+		nvgpu_smp_wmb();
 		gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
 	}
 }
@@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v)
 		__gk20a_warn_on_no_regs();
 		gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
 	} else {
-		wmb();
+		nvgpu_smp_wmb();
 		do {
 			writel_relaxed(v, g->regs + r);
 		} while (readl(g->regs + r) != v);
@@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v)
 		__gk20a_warn_on_no_regs();
 		gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
 	} else {
-		wmb();
+		nvgpu_smp_wmb();
 		writel_relaxed(v, g->bar1 + b);
 		gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cd1d31a5..27442947 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -31,6 +31,7 @@
 #include <nvgpu/firmware.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
+#include <nvgpu/barrier.h>
 
 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
 	gk20a_writel(g, gr_fecs_method_push_r(),
 		gr_fecs_method_push_adr_f(op.method.addr));
 
-	/* op.mb.id == 4 cases require waiting for completion on
-	 * for op.mb.id == 0 */
+	/* op.mailbox.id == 4 cases require waiting for completion on
+	 * for op.mailbox.id == 0 */
 	if (op.mailbox.id == 4)
 		op.mailbox.id = 0;
 
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index e688c863..8e913f23 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -21,6 +21,7 @@
 #include <nvgpu/timers.h>
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
 
 #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -50,7 +51,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
 		(gk20a_readl(g, ccsr_channel_r(c->chid)) &
 		 ~ccsr_channel_enable_set_f(~0)) |
 		 ccsr_channel_enable_set_true_f());
-	wmb();
+	nvgpu_smp_wmb();
 	nvgpu_atomic_set(&c->bound, true);
 }
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/barrier.h
new file mode 100644
index 00000000..26eec3ed
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/barrier.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This file contains NVGPU_* high-level abstractions for various
+ * memor-barrier operations available in linux/kernel. Every OS
+ * should provide their own OS specific calls under this common API
+ */
+
+#ifndef __NVGPU_BARRIER_H__
+#define __NVGPU_BARRIER_H__
+
+#ifdef __KERNEL__
+#include <nvgpu/linux/barrier.h>
+#endif
+
+#define nvgpu_mb()	__nvgpu_mb()
+#define nvgpu_rmb()	__nvgpu_rmb()
+#define nvgpu_wmb()	__nvgpu_wmb()
+
+#define nvgpu_smp_mb()	__nvgpu_smp_mb()
+#define nvgpu_smp_rmb()	__nvgpu_smp_rmb()
+#define nvgpu_smp_wmb()	__nvgpu_smp_wmb()
+
+#define nvgpu_read_barrier_depends() __nvgpu_read_barrier_depends()
+#define nvgpu_smp_read_barrier_depends() __nvgpu_smp_read_barrier_depends()
+
+#endif /* __NVGPU_BARRIER_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h
new file mode 100644
index 00000000..e7b83ee8
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_BARRIER_LINUX_H__
+#define __NVGPU_BARRIER_LINUX_H__
+
+#include <asm/barrier.h>
+
+#define __nvgpu_mb()	mb()
+#define __nvgpu_rmb()	rmb()
+#define __nvgpu_wmb()	wmb()
+
+#define __nvgpu_smp_mb()	smp_mb()
+#define __nvgpu_smp_rmb()	smp_rmb()
+#define __nvgpu_smp_wmb()	smp_wmb()
+
+#define __nvgpu_read_barrier_depends()	read_barrier_depends()
+#define __nvgpu_smp_read_barrier_depends()	smp_read_barrier_depends()
+
+#endif /* __NVGPU_BARRIER_LINUX_H__ */
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index c8519905..3f03e25a 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -20,6 +20,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/atomic.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/barrier.h>
 
 #include "vgpu/vgpu.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
@@ -42,7 +43,7 @@ static void vgpu_channel_bind(struct channel_gk20a *ch)
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
-	wmb();
+	nvgpu_smp_wmb();
 	nvgpu_atomic_set(&ch->bound, true);
 }
 
@@ -370,7 +371,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g)
 		v = gk20a_bar1_readl(g, bar1_vaddr);
 
 		*cpu_vaddr = v1;
-		smp_mb();
+		nvgpu_smp_mb();
 
 		if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
 			nvgpu_err(g, "bar1 broken @ gk20a!");
@@ -728,7 +729,7 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
 
 	/* mark channel as faulted */
 	ch->has_timedout = true;
-	wmb();
+	nvgpu_smp_wmb();
 	/* unblock pending waits */
 	nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
 	nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
-- 
cgit v1.2.2