gpu: nvgpu: Nvgpu abstraction for linux barriers.

construct wrapper nvgpu_* methods to replace mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and smp_read_barrier_depends. NVGPU-122 Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df Signed-off-by: Debarshi <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541199 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
author: Debarshi Dutta <ddutta@nvidia.com> 2017-08-18 06:52:29 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-08-22 06:53:51 -0400
commit: 81868a187fa3b217368206f17b19309846e8e7fb (patch)
tree: 2b59e33b61cc6e206f7781f3b4ab44c5c7b6d721 /drivers/gpu/nvgpu/gk20a
parent: 5f010177de985c901c33c914efe70a8498a5974f (diff)
8 files changed, 50 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index f50fec13..3e979ebd 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -27,6 +27,7 @@
 #include <nvgpu/hw/gk20a/hw_top_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+#include <nvgpu/barrier.h>
 static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
 {
@@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
                /* take always the postfence as it is needed for protecting the ce context */
                submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
-                wmb();
+                nvgpu_smp_wmb();
                ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
                                        1, submit_flags, &fence,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5f81b441..0c1b06e9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -35,6 +35,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/ltc.h>
+#include <nvgpu/barrier.h>
 #include "gk20a.h"
 #include "ctxsw_trace_gk20a.h"
@@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
                /*
                 * ensure put is read before any subsequent reads.
-                 * see corresponding wmb in gk20a_channel_add_job()
+                 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
                 */
-                rmb();
+                nvgpu_smp_rmb();
                while (tmp_get != put) {
                        job = &ch->joblist.pre_alloc.jobs[tmp_get];
@@ -618,7 +619,7 @@ unbind:
        /* make sure we catch accesses of unopened channels in case
         * there's non-refcounted channel pointers hanging around */
        ch->g = NULL;
-        wmb();
+        nvgpu_smp_wmb();
        /* ALWAYS last */
        free_channel(f, ch);
@@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
         * gk20a_free_channel() */
        ch->referenceable = true;
        nvgpu_atomic_set(&ch->ref_count, 1);
-        wmb();
+        nvgpu_smp_wmb();
        return ch;
 }
@@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        /*
         * commit the previous writes before making the entry valid.
-         * see the corresponding rmb() in gk20a_free_priv_cmdbuf().
+         * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf().
         */
-        wmb();
+        nvgpu_smp_wmb();
        e->valid = true;
        gk20a_dbg_fn("done");
@@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c,
                /*
                 * ensure all subsequent reads happen after reading get.
-                 * see corresponding wmb in gk20a_channel_clean_up_jobs()
+                 * see corresponding nvgpu_smp_wmb in
+                 * gk20a_channel_clean_up_jobs()
                 */
-                rmb();
+                nvgpu_smp_rmb();
                if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
                        *job_out = &c->joblist.pre_alloc.jobs[put];
@@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
 {
        bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
-        rmb();
+        nvgpu_smp_rmb();
        return pre_alloc_enabled;
 }
@@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
        /*
         * commit the previous writes before setting the flag.
-         * see corresponding rmb in channel_gk20a_is_prealloc_enabled()
+         * see corresponding nvgpu_smp_rmb in
+         * channel_gk20a_is_prealloc_enabled()
         */
-        wmb();
+        nvgpu_smp_wmb();
        c->joblist.pre_alloc.enabled = true;
        return 0;
@@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
        /*
         * commit the previous writes before disabling the flag.
-         * see corresponding rmb in channel_gk20a_is_prealloc_enabled()
+         * see corresponding nvgpu_smp_rmb in
+         * channel_gk20a_is_prealloc_enabled()
         */
-        wmb();
+        nvgpu_smp_wmb();
        c->joblist.pre_alloc.enabled = false;
 }
@@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
        /*
         * Currently, the only work type is associated with a lock, which deals
         * with any necessary barriers. If a work type with no locking were
-         * added, a a wmb() would be needed here. See ..worker_pending() for a
+         * added, a nvgpu_smp_wmb() would be needed here. See
-         * pair.
+         * ..worker_pending() for a pair.
         */
        put = nvgpu_atomic_inc_return(&g->channel_worker.put);
@@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
        bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
        /*
-         * This would be the place for a rmb() pairing a wmb() for a wakeup
+         * This would be the place for a nvgpu_smp_rmb() pairing
-         * if we had any work with no implicit barriers caused by locking.
+         * a nvgpu_smp_wmb() for a wakeup if we had any work with
+         * no implicit barriers caused by locking.
         */
        return pending;
@@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
        if (e->valid) {
                /* read the entry's valid flag before reading its contents */
-                rmb();
+                nvgpu_smp_rmb();
                if ((q->get != e->off) && e->off != 0)
                        nvgpu_err(g, "requests out-of-order, ch=%d",
                                  c->chid);
@@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
                /*
                 * ensure all pending write complete before adding to the list.
-                 * see corresponding rmb in gk20a_channel_clean_up_jobs() &
+                 * see corresponding nvgpu_smp_rmb in
+                 * gk20a_channel_clean_up_jobs() &
                 * gk20a_channel_abort_clean_up()
                 */
-                wmb();
+                nvgpu_smp_wmb();
                channel_gk20a_joblist_add(c, job);
                if (!pre_alloc_enabled)
@@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                /*
                 * ensure that all subsequent reads occur after checking
-                 * that we have a valid node. see corresponding wmb in
+                 * that we have a valid node. see corresponding nvgpu_smp_wmb in
                 * gk20a_channel_add_job().
                 */
-                rmb();
+                nvgpu_smp_rmb();
                job = channel_gk20a_joblist_peek(c);
                channel_gk20a_joblist_unlock(c);
@@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                /*
                 * ensure all pending writes complete before freeing up the job.
-                 * see corresponding rmb in channel_gk20a_alloc_job().
+                 * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job().
                 */
-                wmb();
+                nvgpu_smp_wmb();
                channel_gk20a_free_job(c, job);
                job_finished = 1;
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 546917f1..91c3b206 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -29,6 +29,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
 #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
@@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
        dev->ents[write_idx] = *entry;
        /* ensure record is written before updating write index */
-        smp_wmb();
+        nvgpu_smp_wmb();
        write_idx++;
        if (unlikely(write_idx >= hdr->num_ents))
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 5ee90440..fea3b0fa 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -22,6 +22,7 @@
 #include <nvgpu/hashtable.h>
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
+#include <nvgpu/barrier.h>
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
@@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
        }
        /* ensure FECS records has been updated before incrementing read index */
-        wmb();
+        nvgpu_smp_wmb();
        gk20a_fecs_trace_set_read_index(g, read);
 done:
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 8ad24c44..5fa9a0df 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -21,6 +21,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/nvhost.h>
+#include <nvgpu/barrier.h>
 #include "gk20a.h"
 #include "channel_gk20a.h"
@@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
 {
        bool valid = f->valid;
-        rmb();
+        nvgpu_smp_rmb();
        return valid;
 }
@@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore(
        f->semaphore_wq = semaphore_wq;
        /* commit previous writes before setting the valid flag */
-        wmb();
+        nvgpu_smp_wmb();
        f->valid = true;
        return 0;
@@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt(
        f->syncpt_value = value;
        /* commit previous writes before setting the valid flag */
-        wmb();
+        nvgpu_smp_wmb();
        f->valid = true;
        return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 47e7d82e..fd249bc9 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -30,6 +30,7 @@
 #include <nvgpu/log2.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/nvhost.h>
+#include <nvgpu/barrier.h>
 #include "gk20a.h"
 #include "ctxsw_trace_gk20a.h"
@@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
                v = gk20a_bar1_readl(g, bar1_vaddr);
                *cpu_vaddr = v1;
-                smp_mb();
+                nvgpu_smp_mb();
                if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
                        nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
@@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
        if (refch) {
                /* mark channel as faulted */
                refch->has_timedout = true;
-                wmb();
+                nvgpu_smp_wmb();
                /* unblock pending waits */
                nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
                nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 19ea76cb..ab2d0b7f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample;
 #include <nvgpu/falcon.h>
 #include <nvgpu/pmu.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
 #include "clk_gk20a.h"
 #include "ce2_gk20a.h"
@@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v)
                gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
        } else {
                writel_relaxed(v, g->regs + r);
-                wmb();
+                nvgpu_smp_wmb();
                gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
        }
 }
@@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v)
                __gk20a_warn_on_no_regs();
                gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
        } else {
-                wmb();
+                nvgpu_smp_wmb();
                do {
                        writel_relaxed(v, g->regs + r);
                } while (readl(g->regs + r) != v);
@@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v)
                __gk20a_warn_on_no_regs();
                gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
        } else {
-                wmb();
+                nvgpu_smp_wmb();
                writel_relaxed(v, g->bar1 + b);
                gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
        }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cd1d31a5..27442947 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -31,6 +31,7 @@
 #include <nvgpu/firmware.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
+#include <nvgpu/barrier.h>
 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
        gk20a_writel(g, gr_fecs_method_push_r(),
                gr_fecs_method_push_adr_f(op.method.addr));
-        /* op.mb.id == 4 cases require waiting for completion on
+        /* op.mailbox.id == 4 cases require waiting for completion on
-         * for op.mb.id == 0 */
+         * for op.mailbox.id == 0 */
        if (op.mailbox.id == 4)
                op.mailbox.id = 0;
author	Debarshi Dutta <ddutta@nvidia.com>	2017-08-18 06:52:29 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-08-22 06:53:51 -0400
commit	81868a187fa3b217368206f17b19309846e8e7fb (patch)
tree	2b59e33b61cc6e206f7781f3b4ab44c5c7b6d721 /drivers/gpu/nvgpu/gk20a
parent	5f010177de985c901c33c914efe70a8498a5974f (diff)