summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2017-08-18 06:52:29 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-08-22 06:53:51 -0400
commit81868a187fa3b217368206f17b19309846e8e7fb (patch)
tree2b59e33b61cc6e206f7781f3b4ab44c5c7b6d721 /drivers/gpu/nvgpu/gk20a
parent5f010177de985c901c33c914efe70a8498a5974f (diff)
gpu: nvgpu: Nvgpu abstraction for linux barriers.
construct wrapper nvgpu_* methods to replace mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and smp_read_barrier_depends. NVGPU-122 Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df Signed-off-by: Debarshi <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541199 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c54
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
8 files changed, 50 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index f50fec13..3e979ebd 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -27,6 +27,7 @@
27#include <nvgpu/hw/gk20a/hw_top_gk20a.h> 27#include <nvgpu/hw/gk20a/hw_top_gk20a.h>
28#include <nvgpu/hw/gk20a/hw_mc_gk20a.h> 28#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
29#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 29#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
30#include <nvgpu/barrier.h>
30 31
31static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) 32static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
32{ 33{
@@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
654 /* take always the postfence as it is needed for protecting the ce context */ 655 /* take always the postfence as it is needed for protecting the ce context */
655 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; 656 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
656 657
657 wmb(); 658 nvgpu_smp_wmb();
658 659
659 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, 660 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
660 1, submit_flags, &fence, 661 1, submit_flags, &fence,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5f81b441..0c1b06e9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -35,6 +35,7 @@
35#include <nvgpu/enabled.h> 35#include <nvgpu/enabled.h>
36#include <nvgpu/debug.h> 36#include <nvgpu/debug.h>
37#include <nvgpu/ltc.h> 37#include <nvgpu/ltc.h>
38#include <nvgpu/barrier.h>
38 39
39#include "gk20a.h" 40#include "gk20a.h"
40#include "ctxsw_trace_gk20a.h" 41#include "ctxsw_trace_gk20a.h"
@@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
245 246
246 /* 247 /*
247 * ensure put is read before any subsequent reads. 248 * ensure put is read before any subsequent reads.
248 * see corresponding wmb in gk20a_channel_add_job() 249 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
249 */ 250 */
250 rmb(); 251 nvgpu_smp_rmb();
251 252
252 while (tmp_get != put) { 253 while (tmp_get != put) {
253 job = &ch->joblist.pre_alloc.jobs[tmp_get]; 254 job = &ch->joblist.pre_alloc.jobs[tmp_get];
@@ -618,7 +619,7 @@ unbind:
618 /* make sure we catch accesses of unopened channels in case 619 /* make sure we catch accesses of unopened channels in case
619 * there's non-refcounted channel pointers hanging around */ 620 * there's non-refcounted channel pointers hanging around */
620 ch->g = NULL; 621 ch->g = NULL;
621 wmb(); 622 nvgpu_smp_wmb();
622 623
623 /* ALWAYS last */ 624 /* ALWAYS last */
624 free_channel(f, ch); 625 free_channel(f, ch);
@@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
880 * gk20a_free_channel() */ 881 * gk20a_free_channel() */
881 ch->referenceable = true; 882 ch->referenceable = true;
882 nvgpu_atomic_set(&ch->ref_count, 1); 883 nvgpu_atomic_set(&ch->ref_count, 1);
883 wmb(); 884 nvgpu_smp_wmb();
884 885
885 return ch; 886 return ch;
886} 887}
@@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
993 994
994 /* 995 /*
995 * commit the previous writes before making the entry valid. 996 * commit the previous writes before making the entry valid.
996 * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). 997 * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf().
997 */ 998 */
998 wmb(); 999 nvgpu_smp_wmb();
999 1000
1000 e->valid = true; 1001 e->valid = true;
1001 gk20a_dbg_fn("done"); 1002 gk20a_dbg_fn("done");
@@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c,
1025 1026
1026 /* 1027 /*
1027 * ensure all subsequent reads happen after reading get. 1028 * ensure all subsequent reads happen after reading get.
1028 * see corresponding wmb in gk20a_channel_clean_up_jobs() 1029 * see corresponding nvgpu_smp_wmb in
1030 * gk20a_channel_clean_up_jobs()
1029 */ 1031 */
1030 rmb(); 1032 nvgpu_smp_rmb();
1031 1033
1032 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) 1034 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
1033 *job_out = &c->joblist.pre_alloc.jobs[put]; 1035 *job_out = &c->joblist.pre_alloc.jobs[put];
@@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
1137{ 1139{
1138 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; 1140 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
1139 1141
1140 rmb(); 1142 nvgpu_smp_rmb();
1141 return pre_alloc_enabled; 1143 return pre_alloc_enabled;
1142} 1144}
1143 1145
@@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
1194 1196
1195 /* 1197 /*
1196 * commit the previous writes before setting the flag. 1198 * commit the previous writes before setting the flag.
1197 * see corresponding rmb in channel_gk20a_is_prealloc_enabled() 1199 * see corresponding nvgpu_smp_rmb in
1200 * channel_gk20a_is_prealloc_enabled()
1198 */ 1201 */
1199 wmb(); 1202 nvgpu_smp_wmb();
1200 c->joblist.pre_alloc.enabled = true; 1203 c->joblist.pre_alloc.enabled = true;
1201 1204
1202 return 0; 1205 return 0;
@@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
1218 1221
1219 /* 1222 /*
1220 * commit the previous writes before disabling the flag. 1223 * commit the previous writes before disabling the flag.
1221 * see corresponding rmb in channel_gk20a_is_prealloc_enabled() 1224 * see corresponding nvgpu_smp_rmb in
1225 * channel_gk20a_is_prealloc_enabled()
1222 */ 1226 */
1223 wmb(); 1227 nvgpu_smp_wmb();
1224 c->joblist.pre_alloc.enabled = false; 1228 c->joblist.pre_alloc.enabled = false;
1225} 1229}
1226 1230
@@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
1741 /* 1745 /*
1742 * Currently, the only work type is associated with a lock, which deals 1746 * Currently, the only work type is associated with a lock, which deals
1743 * with any necessary barriers. If a work type with no locking were 1747 * with any necessary barriers. If a work type with no locking were
1744 * added, a a wmb() would be needed here. See ..worker_pending() for a 1748 * added, a nvgpu_smp_wmb() would be needed here. See
1745 * pair. 1749 * ..worker_pending() for a pair.
1746 */ 1750 */
1747 1751
1748 put = nvgpu_atomic_inc_return(&g->channel_worker.put); 1752 put = nvgpu_atomic_inc_return(&g->channel_worker.put);
@@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
1764 bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; 1768 bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
1765 1769
1766 /* 1770 /*
1767 * This would be the place for a rmb() pairing a wmb() for a wakeup 1771 * This would be the place for a nvgpu_smp_rmb() pairing
1768 * if we had any work with no implicit barriers caused by locking. 1772 * a nvgpu_smp_wmb() for a wakeup if we had any work with
1773 * no implicit barriers caused by locking.
1769 */ 1774 */
1770 1775
1771 return pending; 1776 return pending;
@@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
1939 1944
1940 if (e->valid) { 1945 if (e->valid) {
1941 /* read the entry's valid flag before reading its contents */ 1946 /* read the entry's valid flag before reading its contents */
1942 rmb(); 1947 nvgpu_smp_rmb();
1943 if ((q->get != e->off) && e->off != 0) 1948 if ((q->get != e->off) && e->off != 0)
1944 nvgpu_err(g, "requests out-of-order, ch=%d", 1949 nvgpu_err(g, "requests out-of-order, ch=%d",
1945 c->chid); 1950 c->chid);
@@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1984 1989
1985 /* 1990 /*
1986 * ensure all pending write complete before adding to the list. 1991 * ensure all pending write complete before adding to the list.
1987 * see corresponding rmb in gk20a_channel_clean_up_jobs() & 1992 * see corresponding nvgpu_smp_rmb in
1993 * gk20a_channel_clean_up_jobs() &
1988 * gk20a_channel_abort_clean_up() 1994 * gk20a_channel_abort_clean_up()
1989 */ 1995 */
1990 wmb(); 1996 nvgpu_smp_wmb();
1991 channel_gk20a_joblist_add(c, job); 1997 channel_gk20a_joblist_add(c, job);
1992 1998
1993 if (!pre_alloc_enabled) 1999 if (!pre_alloc_enabled)
@@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2061 2067
2062 /* 2068 /*
2063 * ensure that all subsequent reads occur after checking 2069 * ensure that all subsequent reads occur after checking
2064 * that we have a valid node. see corresponding wmb in 2070 * that we have a valid node. see corresponding nvgpu_smp_wmb in
2065 * gk20a_channel_add_job(). 2071 * gk20a_channel_add_job().
2066 */ 2072 */
2067 rmb(); 2073 nvgpu_smp_rmb();
2068 job = channel_gk20a_joblist_peek(c); 2074 job = channel_gk20a_joblist_peek(c);
2069 channel_gk20a_joblist_unlock(c); 2075 channel_gk20a_joblist_unlock(c);
2070 2076
@@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2127 2133
2128 /* 2134 /*
2129 * ensure all pending writes complete before freeing up the job. 2135 * ensure all pending writes complete before freeing up the job.
2130 * see corresponding rmb in channel_gk20a_alloc_job(). 2136 * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job().
2131 */ 2137 */
2132 wmb(); 2138 nvgpu_smp_wmb();
2133 2139
2134 channel_gk20a_free_job(c, job); 2140 channel_gk20a_free_job(c, job);
2135 job_finished = 1; 2141 job_finished = 1;
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 546917f1..91c3b206 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -29,6 +29,7 @@
29 29
30#include <nvgpu/log.h> 30#include <nvgpu/log.h>
31#include <nvgpu/atomic.h> 31#include <nvgpu/atomic.h>
32#include <nvgpu/barrier.h>
32 33
33#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> 34#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 35#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
@@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
635 dev->ents[write_idx] = *entry; 636 dev->ents[write_idx] = *entry;
636 637
637 /* ensure record is written before updating write index */ 638 /* ensure record is written before updating write index */
638 smp_wmb(); 639 nvgpu_smp_wmb();
639 640
640 write_idx++; 641 write_idx++;
641 if (unlikely(write_idx >= hdr->num_ents)) 642 if (unlikely(write_idx >= hdr->num_ents))
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 5ee90440..fea3b0fa 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -22,6 +22,7 @@
22#include <nvgpu/hashtable.h> 22#include <nvgpu/hashtable.h>
23#include <nvgpu/circ_buf.h> 23#include <nvgpu/circ_buf.h>
24#include <nvgpu/thread.h> 24#include <nvgpu/thread.h>
25#include <nvgpu/barrier.h>
25 26
26#include "ctxsw_trace_gk20a.h" 27#include "ctxsw_trace_gk20a.h"
27#include "fecs_trace_gk20a.h" 28#include "fecs_trace_gk20a.h"
@@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
370 } 371 }
371 372
372 /* ensure FECS records has been updated before incrementing read index */ 373 /* ensure FECS records has been updated before incrementing read index */
373 wmb(); 374 nvgpu_smp_wmb();
374 gk20a_fecs_trace_set_read_index(g, read); 375 gk20a_fecs_trace_set_read_index(g, read);
375 376
376done: 377done:
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 8ad24c44..5fa9a0df 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -21,6 +21,7 @@
21#include <nvgpu/kmem.h> 21#include <nvgpu/kmem.h>
22#include <nvgpu/soc.h> 22#include <nvgpu/soc.h>
23#include <nvgpu/nvhost.h> 23#include <nvgpu/nvhost.h>
24#include <nvgpu/barrier.h>
24 25
25#include "gk20a.h" 26#include "gk20a.h"
26#include "channel_gk20a.h" 27#include "channel_gk20a.h"
@@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
73{ 74{
74 bool valid = f->valid; 75 bool valid = f->valid;
75 76
76 rmb(); 77 nvgpu_smp_rmb();
77 return valid; 78 return valid;
78} 79}
79 80
@@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore(
252 f->semaphore_wq = semaphore_wq; 253 f->semaphore_wq = semaphore_wq;
253 254
254 /* commit previous writes before setting the valid flag */ 255 /* commit previous writes before setting the valid flag */
255 wmb(); 256 nvgpu_smp_wmb();
256 f->valid = true; 257 f->valid = true;
257 258
258 return 0; 259 return 0;
@@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt(
327 f->syncpt_value = value; 328 f->syncpt_value = value;
328 329
329 /* commit previous writes before setting the valid flag */ 330 /* commit previous writes before setting the valid flag */
330 wmb(); 331 nvgpu_smp_wmb();
331 f->valid = true; 332 f->valid = true;
332 333
333 return 0; 334 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 47e7d82e..fd249bc9 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -30,6 +30,7 @@
30#include <nvgpu/log2.h> 30#include <nvgpu/log2.h>
31#include <nvgpu/debug.h> 31#include <nvgpu/debug.h>
32#include <nvgpu/nvhost.h> 32#include <nvgpu/nvhost.h>
33#include <nvgpu/barrier.h>
33 34
34#include "gk20a.h" 35#include "gk20a.h"
35#include "ctxsw_trace_gk20a.h" 36#include "ctxsw_trace_gk20a.h"
@@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
966 v = gk20a_bar1_readl(g, bar1_vaddr); 967 v = gk20a_bar1_readl(g, bar1_vaddr);
967 968
968 *cpu_vaddr = v1; 969 *cpu_vaddr = v1;
969 smp_mb(); 970 nvgpu_smp_mb();
970 971
971 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { 972 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
972 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ 973 nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \
@@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g,
1309 if (refch) { 1310 if (refch) {
1310 /* mark channel as faulted */ 1311 /* mark channel as faulted */
1311 refch->has_timedout = true; 1312 refch->has_timedout = true;
1312 wmb(); 1313 nvgpu_smp_wmb();
1313 /* unblock pending waits */ 1314 /* unblock pending waits */
1314 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); 1315 nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq);
1315 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); 1316 nvgpu_cond_broadcast_interruptible(&refch->notifier_wq);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 19ea76cb..ab2d0b7f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample;
49#include <nvgpu/falcon.h> 49#include <nvgpu/falcon.h>
50#include <nvgpu/pmu.h> 50#include <nvgpu/pmu.h>
51#include <nvgpu/atomic.h> 51#include <nvgpu/atomic.h>
52#include <nvgpu/barrier.h>
52 53
53#include "clk_gk20a.h" 54#include "clk_gk20a.h"
54#include "ce2_gk20a.h" 55#include "ce2_gk20a.h"
@@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v)
1324 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); 1325 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
1325 } else { 1326 } else {
1326 writel_relaxed(v, g->regs + r); 1327 writel_relaxed(v, g->regs + r);
1327 wmb(); 1328 nvgpu_smp_wmb();
1328 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); 1329 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
1329 } 1330 }
1330} 1331}
@@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v)
1351 __gk20a_warn_on_no_regs(); 1352 __gk20a_warn_on_no_regs();
1352 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); 1353 gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
1353 } else { 1354 } else {
1354 wmb(); 1355 nvgpu_smp_wmb();
1355 do { 1356 do {
1356 writel_relaxed(v, g->regs + r); 1357 writel_relaxed(v, g->regs + r);
1357 } while (readl(g->regs + r) != v); 1358 } while (readl(g->regs + r) != v);
@@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v)
1365 __gk20a_warn_on_no_regs(); 1366 __gk20a_warn_on_no_regs();
1366 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); 1367 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
1367 } else { 1368 } else {
1368 wmb(); 1369 nvgpu_smp_wmb();
1369 writel_relaxed(v, g->bar1 + b); 1370 writel_relaxed(v, g->bar1 + b);
1370 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); 1371 gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
1371 } 1372 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cd1d31a5..27442947 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -31,6 +31,7 @@
31#include <nvgpu/firmware.h> 31#include <nvgpu/firmware.h>
32#include <nvgpu/enabled.h> 32#include <nvgpu/enabled.h>
33#include <nvgpu/debug.h> 33#include <nvgpu/debug.h>
34#include <nvgpu/barrier.h>
34 35
35#include "gk20a.h" 36#include "gk20a.h"
36#include "kind_gk20a.h" 37#include "kind_gk20a.h"
@@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
554 gk20a_writel(g, gr_fecs_method_push_r(), 555 gk20a_writel(g, gr_fecs_method_push_r(),
555 gr_fecs_method_push_adr_f(op.method.addr)); 556 gr_fecs_method_push_adr_f(op.method.addr));
556 557
557 /* op.mb.id == 4 cases require waiting for completion on 558 /* op.mailbox.id == 4 cases require waiting for completion on
558 * for op.mb.id == 0 */ 559 * for op.mailbox.id == 0 */
559 if (op.mailbox.id == 4) 560 if (op.mailbox.id == 4)
560 op.mailbox.id = 0; 561 op.mailbox.id = 0;
561 562