diff options
author | Debarshi Dutta <ddutta@nvidia.com> | 2017-08-18 06:52:29 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-08-22 06:53:51 -0400 |
commit | 81868a187fa3b217368206f17b19309846e8e7fb (patch) | |
tree | 2b59e33b61cc6e206f7781f3b4ab44c5c7b6d721 /drivers/gpu/nvgpu/gk20a | |
parent | 5f010177de985c901c33c914efe70a8498a5974f (diff) |
gpu: nvgpu: Nvgpu abstraction for linux barriers.
construct wrapper nvgpu_* methods to replace
mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and
smp_read_barrier_depends.
NVGPU-122
Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df
Signed-off-by: Debarshi <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541199
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sourab Gupta <sourabg@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 54 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 |
8 files changed, 50 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index f50fec13..3e979ebd 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <nvgpu/hw/gk20a/hw_top_gk20a.h> | 27 | #include <nvgpu/hw/gk20a/hw_top_gk20a.h> |
28 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> | 28 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> |
29 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 29 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
30 | #include <nvgpu/barrier.h> | ||
30 | 31 | ||
31 | static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) | 32 | static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) |
32 | { | 33 | { |
@@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
654 | /* take always the postfence as it is needed for protecting the ce context */ | 655 | /* take always the postfence as it is needed for protecting the ce context */ |
655 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | 656 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; |
656 | 657 | ||
657 | wmb(); | 658 | nvgpu_smp_wmb(); |
658 | 659 | ||
659 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | 660 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, |
660 | 1, submit_flags, &fence, | 661 | 1, submit_flags, &fence, |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5f81b441..0c1b06e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <nvgpu/enabled.h> | 35 | #include <nvgpu/enabled.h> |
36 | #include <nvgpu/debug.h> | 36 | #include <nvgpu/debug.h> |
37 | #include <nvgpu/ltc.h> | 37 | #include <nvgpu/ltc.h> |
38 | #include <nvgpu/barrier.h> | ||
38 | 39 | ||
39 | #include "gk20a.h" | 40 | #include "gk20a.h" |
40 | #include "ctxsw_trace_gk20a.h" | 41 | #include "ctxsw_trace_gk20a.h" |
@@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
245 | 246 | ||
246 | /* | 247 | /* |
247 | * ensure put is read before any subsequent reads. | 248 | * ensure put is read before any subsequent reads. |
248 | * see corresponding wmb in gk20a_channel_add_job() | 249 | * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job() |
249 | */ | 250 | */ |
250 | rmb(); | 251 | nvgpu_smp_rmb(); |
251 | 252 | ||
252 | while (tmp_get != put) { | 253 | while (tmp_get != put) { |
253 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; | 254 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; |
@@ -618,7 +619,7 @@ unbind: | |||
618 | /* make sure we catch accesses of unopened channels in case | 619 | /* make sure we catch accesses of unopened channels in case |
619 | * there's non-refcounted channel pointers hanging around */ | 620 | * there's non-refcounted channel pointers hanging around */ |
620 | ch->g = NULL; | 621 | ch->g = NULL; |
621 | wmb(); | 622 | nvgpu_smp_wmb(); |
622 | 623 | ||
623 | /* ALWAYS last */ | 624 | /* ALWAYS last */ |
624 | free_channel(f, ch); | 625 | free_channel(f, ch); |
@@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | |||
880 | * gk20a_free_channel() */ | 881 | * gk20a_free_channel() */ |
881 | ch->referenceable = true; | 882 | ch->referenceable = true; |
882 | nvgpu_atomic_set(&ch->ref_count, 1); | 883 | nvgpu_atomic_set(&ch->ref_count, 1); |
883 | wmb(); | 884 | nvgpu_smp_wmb(); |
884 | 885 | ||
885 | return ch; | 886 | return ch; |
886 | } | 887 | } |
@@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
993 | 994 | ||
994 | /* | 995 | /* |
995 | * commit the previous writes before making the entry valid. | 996 | * commit the previous writes before making the entry valid. |
996 | * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). | 997 | * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). |
997 | */ | 998 | */ |
998 | wmb(); | 999 | nvgpu_smp_wmb(); |
999 | 1000 | ||
1000 | e->valid = true; | 1001 | e->valid = true; |
1001 | gk20a_dbg_fn("done"); | 1002 | gk20a_dbg_fn("done"); |
@@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c, | |||
1025 | 1026 | ||
1026 | /* | 1027 | /* |
1027 | * ensure all subsequent reads happen after reading get. | 1028 | * ensure all subsequent reads happen after reading get. |
1028 | * see corresponding wmb in gk20a_channel_clean_up_jobs() | 1029 | * see corresponding nvgpu_smp_wmb in |
1030 | * gk20a_channel_clean_up_jobs() | ||
1029 | */ | 1031 | */ |
1030 | rmb(); | 1032 | nvgpu_smp_rmb(); |
1031 | 1033 | ||
1032 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) | 1034 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) |
1033 | *job_out = &c->joblist.pre_alloc.jobs[put]; | 1035 | *job_out = &c->joblist.pre_alloc.jobs[put]; |
@@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) | |||
1137 | { | 1139 | { |
1138 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; | 1140 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; |
1139 | 1141 | ||
1140 | rmb(); | 1142 | nvgpu_smp_rmb(); |
1141 | return pre_alloc_enabled; | 1143 | return pre_alloc_enabled; |
1142 | } | 1144 | } |
1143 | 1145 | ||
@@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, | |||
1194 | 1196 | ||
1195 | /* | 1197 | /* |
1196 | * commit the previous writes before setting the flag. | 1198 | * commit the previous writes before setting the flag. |
1197 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | 1199 | * see corresponding nvgpu_smp_rmb in |
1200 | * channel_gk20a_is_prealloc_enabled() | ||
1198 | */ | 1201 | */ |
1199 | wmb(); | 1202 | nvgpu_smp_wmb(); |
1200 | c->joblist.pre_alloc.enabled = true; | 1203 | c->joblist.pre_alloc.enabled = true; |
1201 | 1204 | ||
1202 | return 0; | 1205 | return 0; |
@@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) | |||
1218 | 1221 | ||
1219 | /* | 1222 | /* |
1220 | * commit the previous writes before disabling the flag. | 1223 | * commit the previous writes before disabling the flag. |
1221 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | 1224 | * see corresponding nvgpu_smp_rmb in |
1225 | * channel_gk20a_is_prealloc_enabled() | ||
1222 | */ | 1226 | */ |
1223 | wmb(); | 1227 | nvgpu_smp_wmb(); |
1224 | c->joblist.pre_alloc.enabled = false; | 1228 | c->joblist.pre_alloc.enabled = false; |
1225 | } | 1229 | } |
1226 | 1230 | ||
@@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g) | |||
1741 | /* | 1745 | /* |
1742 | * Currently, the only work type is associated with a lock, which deals | 1746 | * Currently, the only work type is associated with a lock, which deals |
1743 | * with any necessary barriers. If a work type with no locking were | 1747 | * with any necessary barriers. If a work type with no locking were |
1744 | * added, a a wmb() would be needed here. See ..worker_pending() for a | 1748 | * added, a nvgpu_smp_wmb() would be needed here. See |
1745 | * pair. | 1749 | * ..worker_pending() for a pair. |
1746 | */ | 1750 | */ |
1747 | 1751 | ||
1748 | put = nvgpu_atomic_inc_return(&g->channel_worker.put); | 1752 | put = nvgpu_atomic_inc_return(&g->channel_worker.put); |
@@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) | |||
1764 | bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; | 1768 | bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; |
1765 | 1769 | ||
1766 | /* | 1770 | /* |
1767 | * This would be the place for a rmb() pairing a wmb() for a wakeup | 1771 | * This would be the place for a nvgpu_smp_rmb() pairing |
1768 | * if we had any work with no implicit barriers caused by locking. | 1772 | * a nvgpu_smp_wmb() for a wakeup if we had any work with |
1773 | * no implicit barriers caused by locking. | ||
1769 | */ | 1774 | */ |
1770 | 1775 | ||
1771 | return pending; | 1776 | return pending; |
@@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | |||
1939 | 1944 | ||
1940 | if (e->valid) { | 1945 | if (e->valid) { |
1941 | /* read the entry's valid flag before reading its contents */ | 1946 | /* read the entry's valid flag before reading its contents */ |
1942 | rmb(); | 1947 | nvgpu_smp_rmb(); |
1943 | if ((q->get != e->off) && e->off != 0) | 1948 | if ((q->get != e->off) && e->off != 0) |
1944 | nvgpu_err(g, "requests out-of-order, ch=%d", | 1949 | nvgpu_err(g, "requests out-of-order, ch=%d", |
1945 | c->chid); | 1950 | c->chid); |
@@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1984 | 1989 | ||
1985 | /* | 1990 | /* |
1986 | * ensure all pending write complete before adding to the list. | 1991 | * ensure all pending write complete before adding to the list. |
1987 | * see corresponding rmb in gk20a_channel_clean_up_jobs() & | 1992 | * see corresponding nvgpu_smp_rmb in |
1993 | * gk20a_channel_clean_up_jobs() & | ||
1988 | * gk20a_channel_abort_clean_up() | 1994 | * gk20a_channel_abort_clean_up() |
1989 | */ | 1995 | */ |
1990 | wmb(); | 1996 | nvgpu_smp_wmb(); |
1991 | channel_gk20a_joblist_add(c, job); | 1997 | channel_gk20a_joblist_add(c, job); |
1992 | 1998 | ||
1993 | if (!pre_alloc_enabled) | 1999 | if (!pre_alloc_enabled) |
@@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2061 | 2067 | ||
2062 | /* | 2068 | /* |
2063 | * ensure that all subsequent reads occur after checking | 2069 | * ensure that all subsequent reads occur after checking |
2064 | * that we have a valid node. see corresponding wmb in | 2070 | * that we have a valid node. see corresponding nvgpu_smp_wmb in |
2065 | * gk20a_channel_add_job(). | 2071 | * gk20a_channel_add_job(). |
2066 | */ | 2072 | */ |
2067 | rmb(); | 2073 | nvgpu_smp_rmb(); |
2068 | job = channel_gk20a_joblist_peek(c); | 2074 | job = channel_gk20a_joblist_peek(c); |
2069 | channel_gk20a_joblist_unlock(c); | 2075 | channel_gk20a_joblist_unlock(c); |
2070 | 2076 | ||
@@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2127 | 2133 | ||
2128 | /* | 2134 | /* |
2129 | * ensure all pending writes complete before freeing up the job. | 2135 | * ensure all pending writes complete before freeing up the job. |
2130 | * see corresponding rmb in channel_gk20a_alloc_job(). | 2136 | * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). |
2131 | */ | 2137 | */ |
2132 | wmb(); | 2138 | nvgpu_smp_wmb(); |
2133 | 2139 | ||
2134 | channel_gk20a_free_job(c, job); | 2140 | channel_gk20a_free_job(c, job); |
2135 | job_finished = 1; | 2141 | job_finished = 1; |
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 546917f1..91c3b206 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -29,6 +29,7 @@ | |||
29 | 29 | ||
30 | #include <nvgpu/log.h> | 30 | #include <nvgpu/log.h> |
31 | #include <nvgpu/atomic.h> | 31 | #include <nvgpu/atomic.h> |
32 | #include <nvgpu/barrier.h> | ||
32 | 33 | ||
33 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | 34 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> |
34 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 35 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
@@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, | |||
635 | dev->ents[write_idx] = *entry; | 636 | dev->ents[write_idx] = *entry; |
636 | 637 | ||
637 | /* ensure record is written before updating write index */ | 638 | /* ensure record is written before updating write index */ |
638 | smp_wmb(); | 639 | nvgpu_smp_wmb(); |
639 | 640 | ||
640 | write_idx++; | 641 | write_idx++; |
641 | if (unlikely(write_idx >= hdr->num_ents)) | 642 | if (unlikely(write_idx >= hdr->num_ents)) |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 5ee90440..fea3b0fa 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <nvgpu/hashtable.h> | 22 | #include <nvgpu/hashtable.h> |
23 | #include <nvgpu/circ_buf.h> | 23 | #include <nvgpu/circ_buf.h> |
24 | #include <nvgpu/thread.h> | 24 | #include <nvgpu/thread.h> |
25 | #include <nvgpu/barrier.h> | ||
25 | 26 | ||
26 | #include "ctxsw_trace_gk20a.h" | 27 | #include "ctxsw_trace_gk20a.h" |
27 | #include "fecs_trace_gk20a.h" | 28 | #include "fecs_trace_gk20a.h" |
@@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g) | |||
370 | } | 371 | } |
371 | 372 | ||
372 | /* ensure FECS records has been updated before incrementing read index */ | 373 | /* ensure FECS records has been updated before incrementing read index */ |
373 | wmb(); | 374 | nvgpu_smp_wmb(); |
374 | gk20a_fecs_trace_set_read_index(g, read); | 375 | gk20a_fecs_trace_set_read_index(g, read); |
375 | 376 | ||
376 | done: | 377 | done: |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 8ad24c44..5fa9a0df 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <nvgpu/kmem.h> | 21 | #include <nvgpu/kmem.h> |
22 | #include <nvgpu/soc.h> | 22 | #include <nvgpu/soc.h> |
23 | #include <nvgpu/nvhost.h> | 23 | #include <nvgpu/nvhost.h> |
24 | #include <nvgpu/barrier.h> | ||
24 | 25 | ||
25 | #include "gk20a.h" | 26 | #include "gk20a.h" |
26 | #include "channel_gk20a.h" | 27 | #include "channel_gk20a.h" |
@@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) | |||
73 | { | 74 | { |
74 | bool valid = f->valid; | 75 | bool valid = f->valid; |
75 | 76 | ||
76 | rmb(); | 77 | nvgpu_smp_rmb(); |
77 | return valid; | 78 | return valid; |
78 | } | 79 | } |
79 | 80 | ||
@@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore( | |||
252 | f->semaphore_wq = semaphore_wq; | 253 | f->semaphore_wq = semaphore_wq; |
253 | 254 | ||
254 | /* commit previous writes before setting the valid flag */ | 255 | /* commit previous writes before setting the valid flag */ |
255 | wmb(); | 256 | nvgpu_smp_wmb(); |
256 | f->valid = true; | 257 | f->valid = true; |
257 | 258 | ||
258 | return 0; | 259 | return 0; |
@@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt( | |||
327 | f->syncpt_value = value; | 328 | f->syncpt_value = value; |
328 | 329 | ||
329 | /* commit previous writes before setting the valid flag */ | 330 | /* commit previous writes before setting the valid flag */ |
330 | wmb(); | 331 | nvgpu_smp_wmb(); |
331 | f->valid = true; | 332 | f->valid = true; |
332 | 333 | ||
333 | return 0; | 334 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 47e7d82e..fd249bc9 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <nvgpu/log2.h> | 30 | #include <nvgpu/log2.h> |
31 | #include <nvgpu/debug.h> | 31 | #include <nvgpu/debug.h> |
32 | #include <nvgpu/nvhost.h> | 32 | #include <nvgpu/nvhost.h> |
33 | #include <nvgpu/barrier.h> | ||
33 | 34 | ||
34 | #include "gk20a.h" | 35 | #include "gk20a.h" |
35 | #include "ctxsw_trace_gk20a.h" | 36 | #include "ctxsw_trace_gk20a.h" |
@@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g) | |||
966 | v = gk20a_bar1_readl(g, bar1_vaddr); | 967 | v = gk20a_bar1_readl(g, bar1_vaddr); |
967 | 968 | ||
968 | *cpu_vaddr = v1; | 969 | *cpu_vaddr = v1; |
969 | smp_mb(); | 970 | nvgpu_smp_mb(); |
970 | 971 | ||
971 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { | 972 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { |
972 | nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ | 973 | nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ |
@@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g, | |||
1309 | if (refch) { | 1310 | if (refch) { |
1310 | /* mark channel as faulted */ | 1311 | /* mark channel as faulted */ |
1311 | refch->has_timedout = true; | 1312 | refch->has_timedout = true; |
1312 | wmb(); | 1313 | nvgpu_smp_wmb(); |
1313 | /* unblock pending waits */ | 1314 | /* unblock pending waits */ |
1314 | nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); | 1315 | nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); |
1315 | nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); | 1316 | nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 19ea76cb..ab2d0b7f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample; | |||
49 | #include <nvgpu/falcon.h> | 49 | #include <nvgpu/falcon.h> |
50 | #include <nvgpu/pmu.h> | 50 | #include <nvgpu/pmu.h> |
51 | #include <nvgpu/atomic.h> | 51 | #include <nvgpu/atomic.h> |
52 | #include <nvgpu/barrier.h> | ||
52 | 53 | ||
53 | #include "clk_gk20a.h" | 54 | #include "clk_gk20a.h" |
54 | #include "ce2_gk20a.h" | 55 | #include "ce2_gk20a.h" |
@@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) | |||
1324 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | 1325 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); |
1325 | } else { | 1326 | } else { |
1326 | writel_relaxed(v, g->regs + r); | 1327 | writel_relaxed(v, g->regs + r); |
1327 | wmb(); | 1328 | nvgpu_smp_wmb(); |
1328 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | 1329 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); |
1329 | } | 1330 | } |
1330 | } | 1331 | } |
@@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v) | |||
1351 | __gk20a_warn_on_no_regs(); | 1352 | __gk20a_warn_on_no_regs(); |
1352 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | 1353 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); |
1353 | } else { | 1354 | } else { |
1354 | wmb(); | 1355 | nvgpu_smp_wmb(); |
1355 | do { | 1356 | do { |
1356 | writel_relaxed(v, g->regs + r); | 1357 | writel_relaxed(v, g->regs + r); |
1357 | } while (readl(g->regs + r) != v); | 1358 | } while (readl(g->regs + r) != v); |
@@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v) | |||
1365 | __gk20a_warn_on_no_regs(); | 1366 | __gk20a_warn_on_no_regs(); |
1366 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | 1367 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); |
1367 | } else { | 1368 | } else { |
1368 | wmb(); | 1369 | nvgpu_smp_wmb(); |
1369 | writel_relaxed(v, g->bar1 + b); | 1370 | writel_relaxed(v, g->bar1 + b); |
1370 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | 1371 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); |
1371 | } | 1372 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index cd1d31a5..27442947 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <nvgpu/firmware.h> | 31 | #include <nvgpu/firmware.h> |
32 | #include <nvgpu/enabled.h> | 32 | #include <nvgpu/enabled.h> |
33 | #include <nvgpu/debug.h> | 33 | #include <nvgpu/debug.h> |
34 | #include <nvgpu/barrier.h> | ||
34 | 35 | ||
35 | #include "gk20a.h" | 36 | #include "gk20a.h" |
36 | #include "kind_gk20a.h" | 37 | #include "kind_gk20a.h" |
@@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
554 | gk20a_writel(g, gr_fecs_method_push_r(), | 555 | gk20a_writel(g, gr_fecs_method_push_r(), |
555 | gr_fecs_method_push_adr_f(op.method.addr)); | 556 | gr_fecs_method_push_adr_f(op.method.addr)); |
556 | 557 | ||
557 | /* op.mb.id == 4 cases require waiting for completion on | 558 | /* op.mailbox.id == 4 cases require waiting for completion on |
558 | * for op.mb.id == 0 */ | 559 | * for op.mailbox.id == 0 */ |
559 | if (op.mailbox.id == 4) | 560 | if (op.mailbox.id == 4) |
560 | op.mailbox.id = 0; | 561 | op.mailbox.id = 0; |
561 | 562 | ||