diff options
author | Debarshi Dutta <ddutta@nvidia.com> | 2017-08-18 06:52:29 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-08-22 06:53:51 -0400 |
commit | 81868a187fa3b217368206f17b19309846e8e7fb (patch) | |
tree | 2b59e33b61cc6e206f7781f3b4ab44c5c7b6d721 | |
parent | 5f010177de985c901c33c914efe70a8498a5974f (diff) |
gpu: nvgpu: Nvgpu abstraction for linux barriers.
construct wrapper nvgpu_* methods to replace
mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and
smp_read_barrier_depends.
NVGPU-122
Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df
Signed-off-by: Debarshi <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541199
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sourab Gupta <sourabg@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/clk/clk_arb.c | 43 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/lockless_allocator.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pmu/pmu.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pmu/pmu_pg.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 54 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/barrier.h | 40 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h | 33 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 7 |
19 files changed, 171 insertions, 76 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index b00ecd31..f1de54c6 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <nvgpu/bug.h> | 27 | #include <nvgpu/bug.h> |
28 | #include <nvgpu/kref.h> | 28 | #include <nvgpu/kref.h> |
29 | #include <nvgpu/log.h> | 29 | #include <nvgpu/log.h> |
30 | #include <nvgpu/barrier.h> | ||
30 | 31 | ||
31 | #include "gk20a/gk20a.h" | 32 | #include "gk20a/gk20a.h" |
32 | #include "clk/clk_arb.h" | 33 | #include "clk/clk_arb.h" |
@@ -386,7 +387,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
386 | goto init_fail; | 387 | goto init_fail; |
387 | do { | 388 | do { |
388 | /* Check that first run is completed */ | 389 | /* Check that first run is completed */ |
389 | smp_mb(); | 390 | nvgpu_smp_mb(); |
390 | wait_event_interruptible(arb->request_wq, | 391 | wait_event_interruptible(arb->request_wq, |
391 | nvgpu_atomic_read(&arb->req_nr)); | 392 | nvgpu_atomic_read(&arb->req_nr)); |
392 | } while (!nvgpu_atomic_read(&arb->req_nr)); | 393 | } while (!nvgpu_atomic_read(&arb->req_nr)); |
@@ -578,7 +579,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, | |||
578 | session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; | 579 | session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; |
579 | /* make sure that the initialization of the pool is visible | 580 | /* make sure that the initialization of the pool is visible |
580 | * before the update */ | 581 | * before the update */ |
581 | smp_wmb(); | 582 | nvgpu_smp_wmb(); |
582 | session->target = &session->target_pool[0]; | 583 | session->target = &session->target_pool[0]; |
583 | 584 | ||
584 | init_llist_head(&session->targets); | 585 | init_llist_head(&session->targets); |
@@ -706,7 +707,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) | |||
706 | 707 | ||
707 | table = ACCESS_ONCE(arb->current_vf_table); | 708 | table = ACCESS_ONCE(arb->current_vf_table); |
708 | /* make flag visible when all data has resolved in the tables */ | 709 | /* make flag visible when all data has resolved in the tables */ |
709 | smp_rmb(); | 710 | nvgpu_smp_rmb(); |
710 | 711 | ||
711 | table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : | 712 | table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : |
712 | &arb->vf_table_pool[0]; | 713 | &arb->vf_table_pool[0]; |
@@ -980,7 +981,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) | |||
980 | } | 981 | } |
981 | 982 | ||
982 | /* make table visible when all data has resolved in the tables */ | 983 | /* make table visible when all data has resolved in the tables */ |
983 | smp_wmb(); | 984 | nvgpu_smp_wmb(); |
984 | xchg(&arb->current_vf_table, table); | 985 | xchg(&arb->current_vf_table, table); |
985 | 986 | ||
986 | exit_vf_table: | 987 | exit_vf_table: |
@@ -1077,7 +1078,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1077 | &session->target_pool[1] : | 1078 | &session->target_pool[1] : |
1078 | &session->target_pool[0]; | 1079 | &session->target_pool[0]; |
1079 | /* Do not reorder pointer */ | 1080 | /* Do not reorder pointer */ |
1080 | smp_rmb(); | 1081 | nvgpu_smp_rmb(); |
1081 | head = llist_del_all(&session->targets); | 1082 | head = llist_del_all(&session->targets); |
1082 | if (head) { | 1083 | if (head) { |
1083 | 1084 | ||
@@ -1102,7 +1103,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1102 | llist_add(&dev->node, &arb->requests); | 1103 | llist_add(&dev->node, &arb->requests); |
1103 | } | 1104 | } |
1104 | /* Ensure target is updated before ptr sawp */ | 1105 | /* Ensure target is updated before ptr sawp */ |
1105 | smp_wmb(); | 1106 | nvgpu_smp_wmb(); |
1106 | xchg(&session->target, target); | 1107 | xchg(&session->target, target); |
1107 | } | 1108 | } |
1108 | 1109 | ||
@@ -1148,7 +1149,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1148 | if (pstate == VF_POINT_INVALID_PSTATE) { | 1149 | if (pstate == VF_POINT_INVALID_PSTATE) { |
1149 | arb->status = -EINVAL; | 1150 | arb->status = -EINVAL; |
1150 | /* make status visible */ | 1151 | /* make status visible */ |
1151 | smp_mb(); | 1152 | nvgpu_smp_mb(); |
1152 | goto exit_arb; | 1153 | goto exit_arb; |
1153 | } | 1154 | } |
1154 | 1155 | ||
@@ -1175,7 +1176,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1175 | nvgpu_mutex_release(&arb->pstate_lock); | 1176 | nvgpu_mutex_release(&arb->pstate_lock); |
1176 | 1177 | ||
1177 | /* make status visible */ | 1178 | /* make status visible */ |
1178 | smp_mb(); | 1179 | nvgpu_smp_mb(); |
1179 | goto exit_arb; | 1180 | goto exit_arb; |
1180 | } | 1181 | } |
1181 | status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); | 1182 | status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); |
@@ -1184,7 +1185,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1184 | nvgpu_mutex_release(&arb->pstate_lock); | 1185 | nvgpu_mutex_release(&arb->pstate_lock); |
1185 | 1186 | ||
1186 | /* make status visible */ | 1187 | /* make status visible */ |
1187 | smp_mb(); | 1188 | nvgpu_smp_mb(); |
1188 | goto exit_arb; | 1189 | goto exit_arb; |
1189 | } | 1190 | } |
1190 | 1191 | ||
@@ -1196,7 +1197,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1196 | nvgpu_mutex_release(&arb->pstate_lock); | 1197 | nvgpu_mutex_release(&arb->pstate_lock); |
1197 | 1198 | ||
1198 | /* make status visible */ | 1199 | /* make status visible */ |
1199 | smp_mb(); | 1200 | nvgpu_smp_mb(); |
1200 | goto exit_arb; | 1201 | goto exit_arb; |
1201 | } | 1202 | } |
1202 | 1203 | ||
@@ -1206,7 +1207,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1206 | nvgpu_mutex_release(&arb->pstate_lock); | 1207 | nvgpu_mutex_release(&arb->pstate_lock); |
1207 | 1208 | ||
1208 | /* make status visible */ | 1209 | /* make status visible */ |
1209 | smp_mb(); | 1210 | nvgpu_smp_mb(); |
1210 | goto exit_arb; | 1211 | goto exit_arb; |
1211 | } | 1212 | } |
1212 | 1213 | ||
@@ -1216,7 +1217,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1216 | nvgpu_mutex_release(&arb->pstate_lock); | 1217 | nvgpu_mutex_release(&arb->pstate_lock); |
1217 | 1218 | ||
1218 | /* make status visible */ | 1219 | /* make status visible */ |
1219 | smp_mb(); | 1220 | nvgpu_smp_mb(); |
1220 | goto exit_arb; | 1221 | goto exit_arb; |
1221 | } | 1222 | } |
1222 | 1223 | ||
@@ -1224,7 +1225,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1224 | &arb->actual_pool[1] : &arb->actual_pool[0]; | 1225 | &arb->actual_pool[1] : &arb->actual_pool[0]; |
1225 | 1226 | ||
1226 | /* do not reorder this pointer */ | 1227 | /* do not reorder this pointer */ |
1227 | smp_rmb(); | 1228 | nvgpu_smp_rmb(); |
1228 | actual->gpc2clk = gpc2clk_target; | 1229 | actual->gpc2clk = gpc2clk_target; |
1229 | actual->mclk = mclk_target; | 1230 | actual->mclk = mclk_target; |
1230 | arb->voltuv_actual = voltuv; | 1231 | arb->voltuv_actual = voltuv; |
@@ -1232,7 +1233,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1232 | arb->status = status; | 1233 | arb->status = status; |
1233 | 1234 | ||
1234 | /* Make changes visible to other threads */ | 1235 | /* Make changes visible to other threads */ |
1235 | smp_wmb(); | 1236 | nvgpu_smp_wmb(); |
1236 | xchg(&arb->actual, actual); | 1237 | xchg(&arb->actual, actual); |
1237 | 1238 | ||
1238 | status = nvgpu_lpwr_enable_pg(g, false); | 1239 | status = nvgpu_lpwr_enable_pg(g, false); |
@@ -1241,12 +1242,12 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1241 | nvgpu_mutex_release(&arb->pstate_lock); | 1242 | nvgpu_mutex_release(&arb->pstate_lock); |
1242 | 1243 | ||
1243 | /* make status visible */ | 1244 | /* make status visible */ |
1244 | smp_mb(); | 1245 | nvgpu_smp_mb(); |
1245 | goto exit_arb; | 1246 | goto exit_arb; |
1246 | } | 1247 | } |
1247 | 1248 | ||
1248 | /* status must be visible before atomic inc */ | 1249 | /* status must be visible before atomic inc */ |
1249 | smp_wmb(); | 1250 | nvgpu_smp_wmb(); |
1250 | nvgpu_atomic_inc(&arb->req_nr); | 1251 | nvgpu_atomic_inc(&arb->req_nr); |
1251 | 1252 | ||
1252 | /* Unlock pstate change for PG */ | 1253 | /* Unlock pstate change for PG */ |
@@ -1287,7 +1288,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
1287 | (curr - debug->switch_avg) * (curr - prev_avg); | 1288 | (curr - debug->switch_avg) * (curr - prev_avg); |
1288 | } | 1289 | } |
1289 | /* commit changes before exchanging debug pointer */ | 1290 | /* commit changes before exchanging debug pointer */ |
1290 | smp_wmb(); | 1291 | nvgpu_smp_wmb(); |
1291 | xchg(&arb->debug, debug); | 1292 | xchg(&arb->debug, debug); |
1292 | #endif | 1293 | #endif |
1293 | 1294 | ||
@@ -1687,7 +1688,7 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, | |||
1687 | do { | 1688 | do { |
1688 | target = ACCESS_ONCE(session->target); | 1689 | target = ACCESS_ONCE(session->target); |
1689 | /* no reordering of this pointer */ | 1690 | /* no reordering of this pointer */ |
1690 | smp_rmb(); | 1691 | nvgpu_smp_rmb(); |
1691 | 1692 | ||
1692 | switch (api_domain) { | 1693 | switch (api_domain) { |
1693 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 1694 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
@@ -1716,7 +1717,7 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, | |||
1716 | do { | 1717 | do { |
1717 | actual = ACCESS_ONCE(arb->actual); | 1718 | actual = ACCESS_ONCE(arb->actual); |
1718 | /* no reordering of this pointer */ | 1719 | /* no reordering of this pointer */ |
1719 | smp_rmb(); | 1720 | nvgpu_smp_rmb(); |
1720 | 1721 | ||
1721 | switch (api_domain) { | 1722 | switch (api_domain) { |
1722 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 1723 | case NVGPU_GPU_CLK_DOMAIN_MCLK: |
@@ -1854,7 +1855,7 @@ static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, | |||
1854 | 1855 | ||
1855 | table = ACCESS_ONCE(arb->current_vf_table); | 1856 | table = ACCESS_ONCE(arb->current_vf_table); |
1856 | /* pointer to table can be updated by callback */ | 1857 | /* pointer to table can be updated by callback */ |
1857 | smp_rmb(); | 1858 | nvgpu_smp_rmb(); |
1858 | 1859 | ||
1859 | if (!table) | 1860 | if (!table) |
1860 | continue; | 1861 | continue; |
@@ -2039,7 +2040,7 @@ static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | |||
2039 | 2040 | ||
2040 | debug = ACCESS_ONCE(arb->debug); | 2041 | debug = ACCESS_ONCE(arb->debug); |
2041 | /* Make copy of structure and ensure no reordering */ | 2042 | /* Make copy of structure and ensure no reordering */ |
2042 | smp_rmb(); | 2043 | nvgpu_smp_rmb(); |
2043 | if (!debug) | 2044 | if (!debug) |
2044 | return -EINVAL; | 2045 | return -EINVAL; |
2045 | 2046 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c index eae0475a..274e9c93 100644 --- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <nvgpu/allocator.h> | 18 | #include <nvgpu/allocator.h> |
19 | #include <nvgpu/kmem.h> | 19 | #include <nvgpu/kmem.h> |
20 | #include <nvgpu/bug.h> | 20 | #include <nvgpu/bug.h> |
21 | #include <nvgpu/barrier.h> | ||
21 | 22 | ||
22 | #include "bitmap_allocator_priv.h" | 23 | #include "bitmap_allocator_priv.h" |
23 | 24 | ||
@@ -40,7 +41,7 @@ static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a) | |||
40 | struct nvgpu_bitmap_allocator *ba = a->priv; | 41 | struct nvgpu_bitmap_allocator *ba = a->priv; |
41 | int inited = ba->inited; | 42 | int inited = ba->inited; |
42 | 43 | ||
43 | rmb(); | 44 | nvgpu_smp_rmb(); |
44 | return inited; | 45 | return inited; |
45 | } | 46 | } |
46 | 47 | ||
@@ -408,7 +409,7 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
408 | goto fail; | 409 | goto fail; |
409 | } | 410 | } |
410 | 411 | ||
411 | wmb(); | 412 | nvgpu_smp_wmb(); |
412 | a->inited = true; | 413 | a->inited = true; |
413 | 414 | ||
414 | #ifdef CONFIG_DEBUG_FS | 415 | #ifdef CONFIG_DEBUG_FS |
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 0ef94c10..3e305bb8 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <nvgpu/kmem.h> | 18 | #include <nvgpu/kmem.h> |
19 | #include <nvgpu/bug.h> | 19 | #include <nvgpu/bug.h> |
20 | #include <nvgpu/log2.h> | 20 | #include <nvgpu/log2.h> |
21 | #include <nvgpu/barrier.h> | ||
21 | 22 | ||
22 | #include "gk20a/mm_gk20a.h" | 23 | #include "gk20a/mm_gk20a.h" |
23 | #include "gk20a/platform_gk20a.h" | 24 | #include "gk20a/platform_gk20a.h" |
@@ -1064,7 +1065,7 @@ static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a) | |||
1064 | struct nvgpu_buddy_allocator *ba = a->priv; | 1065 | struct nvgpu_buddy_allocator *ba = a->priv; |
1065 | int inited = ba->initialized; | 1066 | int inited = ba->initialized; |
1066 | 1067 | ||
1067 | rmb(); | 1068 | nvgpu_smp_rmb(); |
1068 | return inited; | 1069 | return inited; |
1069 | } | 1070 | } |
1070 | 1071 | ||
@@ -1289,7 +1290,7 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
1289 | if (err) | 1290 | if (err) |
1290 | goto fail; | 1291 | goto fail; |
1291 | 1292 | ||
1292 | wmb(); | 1293 | nvgpu_smp_wmb(); |
1293 | a->initialized = 1; | 1294 | a->initialized = 1; |
1294 | 1295 | ||
1295 | #ifdef CONFIG_DEBUG_FS | 1296 | #ifdef CONFIG_DEBUG_FS |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 73dff2c3..7f486d68 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <nvgpu/nvgpu_mem.h> | 21 | #include <nvgpu/nvgpu_mem.h> |
22 | #include <nvgpu/enabled.h> | 22 | #include <nvgpu/enabled.h> |
23 | #include <nvgpu/page_allocator.h> | 23 | #include <nvgpu/page_allocator.h> |
24 | #include <nvgpu/barrier.h> | ||
24 | 25 | ||
25 | #include "gk20a/gk20a.h" | 26 | #include "gk20a/gk20a.h" |
26 | #include "gk20a/mm_gk20a.h" | 27 | #include "gk20a/mm_gk20a.h" |
@@ -164,8 +165,8 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) | |||
164 | return err; | 165 | return err; |
165 | 166 | ||
166 | /* | 167 | /* |
167 | * One mb() is done after all mapping operations. Don't need individual | 168 | * One nvgpu_smp_mb() is done after all mapping operations. Don't need |
168 | * barriers for each PD write. | 169 | * individual barriers for each PD write. |
169 | */ | 170 | */ |
170 | vm->pdb.mem->skip_wmb = true; | 171 | vm->pdb.mem->skip_wmb = true; |
171 | 172 | ||
@@ -259,8 +260,8 @@ static int pd_allocate(struct vm_gk20a *vm, | |||
259 | } | 260 | } |
260 | 261 | ||
261 | /* | 262 | /* |
262 | * One mb() is done after all mapping operations. Don't need individual | 263 | * One nvgpu_smp_mb() is done after all mapping operations. Don't need |
263 | * barriers for each PD write. | 264 | * individual barriers for each PD write. |
264 | */ | 265 | */ |
265 | pd->mem->skip_wmb = true; | 266 | pd->mem->skip_wmb = true; |
266 | 267 | ||
@@ -714,7 +715,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
714 | attrs); | 715 | attrs); |
715 | 716 | ||
716 | unmap_gmmu_pages(g, &vm->pdb); | 717 | unmap_gmmu_pages(g, &vm->pdb); |
717 | mb(); | 718 | nvgpu_smp_mb(); |
718 | 719 | ||
719 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); | 720 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); |
720 | 721 | ||
@@ -983,7 +984,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) | |||
983 | * There probably also needs to be a TLB invalidate as well but we leave | 984 | * There probably also needs to be a TLB invalidate as well but we leave |
984 | * that to the caller of this function. | 985 | * that to the caller of this function. |
985 | */ | 986 | */ |
986 | wmb(); | 987 | nvgpu_smp_wmb(); |
987 | 988 | ||
988 | return 0; | 989 | return 0; |
989 | } | 990 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c index eeb86095..8f712a14 100644 --- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <nvgpu/atomic.h> | 17 | #include <nvgpu/atomic.h> |
18 | #include <nvgpu/allocator.h> | 18 | #include <nvgpu/allocator.h> |
19 | #include <nvgpu/kmem.h> | 19 | #include <nvgpu/kmem.h> |
20 | #include <nvgpu/barrier.h> | ||
20 | 21 | ||
21 | #include "lockless_allocator_priv.h" | 22 | #include "lockless_allocator_priv.h" |
22 | 23 | ||
@@ -39,7 +40,7 @@ static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a) | |||
39 | struct nvgpu_lockless_allocator *pa = a->priv; | 40 | struct nvgpu_lockless_allocator *pa = a->priv; |
40 | int inited = pa->inited; | 41 | int inited = pa->inited; |
41 | 42 | ||
42 | rmb(); | 43 | nvgpu_smp_rmb(); |
43 | return inited; | 44 | return inited; |
44 | } | 45 | } |
45 | 46 | ||
@@ -198,7 +199,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
198 | a->flags = flags; | 199 | a->flags = flags; |
199 | nvgpu_atomic_set(&a->nr_allocs, 0); | 200 | nvgpu_atomic_set(&a->nr_allocs, 0); |
200 | 201 | ||
201 | wmb(); | 202 | nvgpu_smp_wmb(); |
202 | a->inited = true; | 203 | a->inited = true; |
203 | 204 | ||
204 | #ifdef CONFIG_DEBUG_FS | 205 | #ifdef CONFIG_DEBUG_FS |
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c index 58108722..63597d10 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <nvgpu/log.h> | 16 | #include <nvgpu/log.h> |
17 | #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h> | 17 | #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h> |
18 | #include <nvgpu/enabled.h> | 18 | #include <nvgpu/enabled.h> |
19 | #include <nvgpu/barrier.h> | ||
19 | 20 | ||
20 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
21 | 22 | ||
@@ -394,7 +395,7 @@ void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state, | |||
394 | } | 395 | } |
395 | 396 | ||
396 | /* make status visible */ | 397 | /* make status visible */ |
397 | smp_mb(); | 398 | nvgpu_smp_mb(); |
398 | } | 399 | } |
399 | 400 | ||
400 | static int nvgpu_pg_init_task(void *arg) | 401 | static int nvgpu_pg_init_task(void *arg) |
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c index 935ae95a..b435f4a7 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <nvgpu/pmu.h> | 14 | #include <nvgpu/pmu.h> |
15 | #include <nvgpu/log.h> | 15 | #include <nvgpu/log.h> |
16 | #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h> | 16 | #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h> |
17 | #include <nvgpu/barrier.h> | ||
17 | 18 | ||
18 | #include "gk20a/gk20a.h" | 19 | #include "gk20a/gk20a.h" |
19 | 20 | ||
@@ -84,7 +85,7 @@ static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, | |||
84 | true); | 85 | true); |
85 | WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); | 86 | WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); |
86 | /* make status visible */ | 87 | /* make status visible */ |
87 | smp_mb(); | 88 | nvgpu_smp_mb(); |
88 | } else | 89 | } else |
89 | nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED, | 90 | nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED, |
90 | true); | 91 | true); |
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index f50fec13..3e979ebd 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <nvgpu/hw/gk20a/hw_top_gk20a.h> | 27 | #include <nvgpu/hw/gk20a/hw_top_gk20a.h> |
28 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> | 28 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> |
29 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 29 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
30 | #include <nvgpu/barrier.h> | ||
30 | 31 | ||
31 | static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) | 32 | static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) |
32 | { | 33 | { |
@@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
654 | /* take always the postfence as it is needed for protecting the ce context */ | 655 | /* take always the postfence as it is needed for protecting the ce context */ |
655 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | 656 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; |
656 | 657 | ||
657 | wmb(); | 658 | nvgpu_smp_wmb(); |
658 | 659 | ||
659 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | 660 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, |
660 | 1, submit_flags, &fence, | 661 | 1, submit_flags, &fence, |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5f81b441..0c1b06e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <nvgpu/enabled.h> | 35 | #include <nvgpu/enabled.h> |
36 | #include <nvgpu/debug.h> | 36 | #include <nvgpu/debug.h> |
37 | #include <nvgpu/ltc.h> | 37 | #include <nvgpu/ltc.h> |
38 | #include <nvgpu/barrier.h> | ||
38 | 39 | ||
39 | #include "gk20a.h" | 40 | #include "gk20a.h" |
40 | #include "ctxsw_trace_gk20a.h" | 41 | #include "ctxsw_trace_gk20a.h" |
@@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
245 | 246 | ||
246 | /* | 247 | /* |
247 | * ensure put is read before any subsequent reads. | 248 | * ensure put is read before any subsequent reads. |
248 | * see corresponding wmb in gk20a_channel_add_job() | 249 | * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job() |
249 | */ | 250 | */ |
250 | rmb(); | 251 | nvgpu_smp_rmb(); |
251 | 252 | ||
252 | while (tmp_get != put) { | 253 | while (tmp_get != put) { |
253 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; | 254 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; |
@@ -618,7 +619,7 @@ unbind: | |||
618 | /* make sure we catch accesses of unopened channels in case | 619 | /* make sure we catch accesses of unopened channels in case |
619 | * there's non-refcounted channel pointers hanging around */ | 620 | * there's non-refcounted channel pointers hanging around */ |
620 | ch->g = NULL; | 621 | ch->g = NULL; |
621 | wmb(); | 622 | nvgpu_smp_wmb(); |
622 | 623 | ||
623 | /* ALWAYS last */ | 624 | /* ALWAYS last */ |
624 | free_channel(f, ch); | 625 | free_channel(f, ch); |
@@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | |||
880 | * gk20a_free_channel() */ | 881 | * gk20a_free_channel() */ |
881 | ch->referenceable = true; | 882 | ch->referenceable = true; |
882 | nvgpu_atomic_set(&ch->ref_count, 1); | 883 | nvgpu_atomic_set(&ch->ref_count, 1); |
883 | wmb(); | 884 | nvgpu_smp_wmb(); |
884 | 885 | ||
885 | return ch; | 886 | return ch; |
886 | } | 887 | } |
@@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
993 | 994 | ||
994 | /* | 995 | /* |
995 | * commit the previous writes before making the entry valid. | 996 | * commit the previous writes before making the entry valid. |
996 | * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). | 997 | * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). |
997 | */ | 998 | */ |
998 | wmb(); | 999 | nvgpu_smp_wmb(); |
999 | 1000 | ||
1000 | e->valid = true; | 1001 | e->valid = true; |
1001 | gk20a_dbg_fn("done"); | 1002 | gk20a_dbg_fn("done"); |
@@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c, | |||
1025 | 1026 | ||
1026 | /* | 1027 | /* |
1027 | * ensure all subsequent reads happen after reading get. | 1028 | * ensure all subsequent reads happen after reading get. |
1028 | * see corresponding wmb in gk20a_channel_clean_up_jobs() | 1029 | * see corresponding nvgpu_smp_wmb in |
1030 | * gk20a_channel_clean_up_jobs() | ||
1029 | */ | 1031 | */ |
1030 | rmb(); | 1032 | nvgpu_smp_rmb(); |
1031 | 1033 | ||
1032 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) | 1034 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) |
1033 | *job_out = &c->joblist.pre_alloc.jobs[put]; | 1035 | *job_out = &c->joblist.pre_alloc.jobs[put]; |
@@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) | |||
1137 | { | 1139 | { |
1138 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; | 1140 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; |
1139 | 1141 | ||
1140 | rmb(); | 1142 | nvgpu_smp_rmb(); |
1141 | return pre_alloc_enabled; | 1143 | return pre_alloc_enabled; |
1142 | } | 1144 | } |
1143 | 1145 | ||
@@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, | |||
1194 | 1196 | ||
1195 | /* | 1197 | /* |
1196 | * commit the previous writes before setting the flag. | 1198 | * commit the previous writes before setting the flag. |
1197 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | 1199 | * see corresponding nvgpu_smp_rmb in |
1200 | * channel_gk20a_is_prealloc_enabled() | ||
1198 | */ | 1201 | */ |
1199 | wmb(); | 1202 | nvgpu_smp_wmb(); |
1200 | c->joblist.pre_alloc.enabled = true; | 1203 | c->joblist.pre_alloc.enabled = true; |
1201 | 1204 | ||
1202 | return 0; | 1205 | return 0; |
@@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) | |||
1218 | 1221 | ||
1219 | /* | 1222 | /* |
1220 | * commit the previous writes before disabling the flag. | 1223 | * commit the previous writes before disabling the flag. |
1221 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | 1224 | * see corresponding nvgpu_smp_rmb in |
1225 | * channel_gk20a_is_prealloc_enabled() | ||
1222 | */ | 1226 | */ |
1223 | wmb(); | 1227 | nvgpu_smp_wmb(); |
1224 | c->joblist.pre_alloc.enabled = false; | 1228 | c->joblist.pre_alloc.enabled = false; |
1225 | } | 1229 | } |
1226 | 1230 | ||
@@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g) | |||
1741 | /* | 1745 | /* |
1742 | * Currently, the only work type is associated with a lock, which deals | 1746 | * Currently, the only work type is associated with a lock, which deals |
1743 | * with any necessary barriers. If a work type with no locking were | 1747 | * with any necessary barriers. If a work type with no locking were |
1744 | * added, a a wmb() would be needed here. See ..worker_pending() for a | 1748 | * added, a nvgpu_smp_wmb() would be needed here. See |
1745 | * pair. | 1749 | * ..worker_pending() for a pair. |
1746 | */ | 1750 | */ |
1747 | 1751 | ||
1748 | put = nvgpu_atomic_inc_return(&g->channel_worker.put); | 1752 | put = nvgpu_atomic_inc_return(&g->channel_worker.put); |
@@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) | |||
1764 | bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; | 1768 | bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; |
1765 | 1769 | ||
1766 | /* | 1770 | /* |
1767 | * This would be the place for a rmb() pairing a wmb() for a wakeup | 1771 | * This would be the place for a nvgpu_smp_rmb() pairing |
1768 | * if we had any work with no implicit barriers caused by locking. | 1772 | * a nvgpu_smp_wmb() for a wakeup if we had any work with |
1773 | * no implicit barriers caused by locking. | ||
1769 | */ | 1774 | */ |
1770 | 1775 | ||
1771 | return pending; | 1776 | return pending; |
@@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | |||
1939 | 1944 | ||
1940 | if (e->valid) { | 1945 | if (e->valid) { |
1941 | /* read the entry's valid flag before reading its contents */ | 1946 | /* read the entry's valid flag before reading its contents */ |
1942 | rmb(); | 1947 | nvgpu_smp_rmb(); |
1943 | if ((q->get != e->off) && e->off != 0) | 1948 | if ((q->get != e->off) && e->off != 0) |
1944 | nvgpu_err(g, "requests out-of-order, ch=%d", | 1949 | nvgpu_err(g, "requests out-of-order, ch=%d", |
1945 | c->chid); | 1950 | c->chid); |
@@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1984 | 1989 | ||
1985 | /* | 1990 | /* |
1986 | * ensure all pending write complete before adding to the list. | 1991 | * ensure all pending write complete before adding to the list. |
1987 | * see corresponding rmb in gk20a_channel_clean_up_jobs() & | 1992 | * see corresponding nvgpu_smp_rmb in |
1993 | * gk20a_channel_clean_up_jobs() & | ||
1988 | * gk20a_channel_abort_clean_up() | 1994 | * gk20a_channel_abort_clean_up() |
1989 | */ | 1995 | */ |
1990 | wmb(); | 1996 | nvgpu_smp_wmb(); |
1991 | channel_gk20a_joblist_add(c, job); | 1997 | channel_gk20a_joblist_add(c, job); |
1992 | 1998 | ||
1993 | if (!pre_alloc_enabled) | 1999 | if (!pre_alloc_enabled) |
@@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2061 | 2067 | ||
2062 | /* | 2068 | /* |
2063 | * ensure that all subsequent reads occur after checking | 2069 | * ensure that all subsequent reads occur after checking |
2064 | * that we have a valid node. see corresponding wmb in | 2070 | * that we have a valid node. see corresponding nvgpu_smp_wmb in |
2065 | * gk20a_channel_add_job(). | 2071 | * gk20a_channel_add_job(). |
2066 | */ | 2072 | */ |
2067 | rmb(); | 2073 | nvgpu_smp_rmb(); |
2068 | job = channel_gk20a_joblist_peek(c); | 2074 | job = channel_gk20a_joblist_peek(c); |
2069 | channel_gk20a_joblist_unlock(c); | 2075 | channel_gk20a_joblist_unlock(c); |
2070 | 2076 | ||
@@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2127 | 2133 | ||
2128 | /* | 2134 | /* |
2129 | * ensure all pending writes complete before freeing up the job. | 2135 | * ensure all pending writes complete before freeing up the job. |
2130 | * see corresponding rmb in channel_gk20a_alloc_job(). | 2136 | * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). |
2131 | */ | 2137 | */ |
2132 | wmb(); | 2138 | nvgpu_smp_wmb(); |
2133 | 2139 | ||
2134 | channel_gk20a_free_job(c, job); | 2140 | channel_gk20a_free_job(c, job); |
2135 | job_finished = 1; | 2141 | job_finished = 1; |
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 546917f1..91c3b206 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -29,6 +29,7 @@ | |||
29 | 29 | ||
30 | #include <nvgpu/log.h> | 30 | #include <nvgpu/log.h> |
31 | #include <nvgpu/atomic.h> | 31 | #include <nvgpu/atomic.h> |
32 | #include <nvgpu/barrier.h> | ||
32 | 33 | ||
33 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | 34 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> |
34 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 35 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
@@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, | |||
635 | dev->ents[write_idx] = *entry; | 636 | dev->ents[write_idx] = *entry; |
636 | 637 | ||
637 | /* ensure record is written before updating write index */ | 638 | /* ensure record is written before updating write index */ |
638 | smp_wmb(); | 639 | nvgpu_smp_wmb(); |
639 | 640 | ||
640 | write_idx++; | 641 | write_idx++; |
641 | if (unlikely(write_idx >= hdr->num_ents)) | 642 | if (unlikely(write_idx >= hdr->num_ents)) |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 5ee90440..fea3b0fa 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <nvgpu/hashtable.h> | 22 | #include <nvgpu/hashtable.h> |
23 | #include <nvgpu/circ_buf.h> | 23 | #include <nvgpu/circ_buf.h> |
24 | #include <nvgpu/thread.h> | 24 | #include <nvgpu/thread.h> |
25 | #include <nvgpu/barrier.h> | ||
25 | 26 | ||
26 | #include "ctxsw_trace_gk20a.h" | 27 | #include "ctxsw_trace_gk20a.h" |
27 | #include "fecs_trace_gk20a.h" | 28 | #include "fecs_trace_gk20a.h" |
@@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g) | |||
370 | } | 371 | } |
371 | 372 | ||
372 | /* ensure FECS records has been updated before incrementing read index */ | 373 | /* ensure FECS records has been updated before incrementing read index */ |
373 | wmb(); | 374 | nvgpu_smp_wmb(); |
374 | gk20a_fecs_trace_set_read_index(g, read); | 375 | gk20a_fecs_trace_set_read_index(g, read); |
375 | 376 | ||
376 | done: | 377 | done: |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 8ad24c44..5fa9a0df 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <nvgpu/kmem.h> | 21 | #include <nvgpu/kmem.h> |
22 | #include <nvgpu/soc.h> | 22 | #include <nvgpu/soc.h> |
23 | #include <nvgpu/nvhost.h> | 23 | #include <nvgpu/nvhost.h> |
24 | #include <nvgpu/barrier.h> | ||
24 | 25 | ||
25 | #include "gk20a.h" | 26 | #include "gk20a.h" |
26 | #include "channel_gk20a.h" | 27 | #include "channel_gk20a.h" |
@@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) | |||
73 | { | 74 | { |
74 | bool valid = f->valid; | 75 | bool valid = f->valid; |
75 | 76 | ||
76 | rmb(); | 77 | nvgpu_smp_rmb(); |
77 | return valid; | 78 | return valid; |
78 | } | 79 | } |
79 | 80 | ||
@@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore( | |||
252 | f->semaphore_wq = semaphore_wq; | 253 | f->semaphore_wq = semaphore_wq; |
253 | 254 | ||
254 | /* commit previous writes before setting the valid flag */ | 255 | /* commit previous writes before setting the valid flag */ |
255 | wmb(); | 256 | nvgpu_smp_wmb(); |
256 | f->valid = true; | 257 | f->valid = true; |
257 | 258 | ||
258 | return 0; | 259 | return 0; |
@@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt( | |||
327 | f->syncpt_value = value; | 328 | f->syncpt_value = value; |
328 | 329 | ||
329 | /* commit previous writes before setting the valid flag */ | 330 | /* commit previous writes before setting the valid flag */ |
330 | wmb(); | 331 | nvgpu_smp_wmb(); |
331 | f->valid = true; | 332 | f->valid = true; |
332 | 333 | ||
333 | return 0; | 334 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 47e7d82e..fd249bc9 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <nvgpu/log2.h> | 30 | #include <nvgpu/log2.h> |
31 | #include <nvgpu/debug.h> | 31 | #include <nvgpu/debug.h> |
32 | #include <nvgpu/nvhost.h> | 32 | #include <nvgpu/nvhost.h> |
33 | #include <nvgpu/barrier.h> | ||
33 | 34 | ||
34 | #include "gk20a.h" | 35 | #include "gk20a.h" |
35 | #include "ctxsw_trace_gk20a.h" | 36 | #include "ctxsw_trace_gk20a.h" |
@@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g) | |||
966 | v = gk20a_bar1_readl(g, bar1_vaddr); | 967 | v = gk20a_bar1_readl(g, bar1_vaddr); |
967 | 968 | ||
968 | *cpu_vaddr = v1; | 969 | *cpu_vaddr = v1; |
969 | smp_mb(); | 970 | nvgpu_smp_mb(); |
970 | 971 | ||
971 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { | 972 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { |
972 | nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ | 973 | nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ |
@@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g, | |||
1309 | if (refch) { | 1310 | if (refch) { |
1310 | /* mark channel as faulted */ | 1311 | /* mark channel as faulted */ |
1311 | refch->has_timedout = true; | 1312 | refch->has_timedout = true; |
1312 | wmb(); | 1313 | nvgpu_smp_wmb(); |
1313 | /* unblock pending waits */ | 1314 | /* unblock pending waits */ |
1314 | nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); | 1315 | nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); |
1315 | nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); | 1316 | nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 19ea76cb..ab2d0b7f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample; | |||
49 | #include <nvgpu/falcon.h> | 49 | #include <nvgpu/falcon.h> |
50 | #include <nvgpu/pmu.h> | 50 | #include <nvgpu/pmu.h> |
51 | #include <nvgpu/atomic.h> | 51 | #include <nvgpu/atomic.h> |
52 | #include <nvgpu/barrier.h> | ||
52 | 53 | ||
53 | #include "clk_gk20a.h" | 54 | #include "clk_gk20a.h" |
54 | #include "ce2_gk20a.h" | 55 | #include "ce2_gk20a.h" |
@@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) | |||
1324 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | 1325 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); |
1325 | } else { | 1326 | } else { |
1326 | writel_relaxed(v, g->regs + r); | 1327 | writel_relaxed(v, g->regs + r); |
1327 | wmb(); | 1328 | nvgpu_smp_wmb(); |
1328 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | 1329 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); |
1329 | } | 1330 | } |
1330 | } | 1331 | } |
@@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v) | |||
1351 | __gk20a_warn_on_no_regs(); | 1352 | __gk20a_warn_on_no_regs(); |
1352 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | 1353 | gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); |
1353 | } else { | 1354 | } else { |
1354 | wmb(); | 1355 | nvgpu_smp_wmb(); |
1355 | do { | 1356 | do { |
1356 | writel_relaxed(v, g->regs + r); | 1357 | writel_relaxed(v, g->regs + r); |
1357 | } while (readl(g->regs + r) != v); | 1358 | } while (readl(g->regs + r) != v); |
@@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v) | |||
1365 | __gk20a_warn_on_no_regs(); | 1366 | __gk20a_warn_on_no_regs(); |
1366 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | 1367 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); |
1367 | } else { | 1368 | } else { |
1368 | wmb(); | 1369 | nvgpu_smp_wmb(); |
1369 | writel_relaxed(v, g->bar1 + b); | 1370 | writel_relaxed(v, g->bar1 + b); |
1370 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | 1371 | gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); |
1371 | } | 1372 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index cd1d31a5..27442947 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <nvgpu/firmware.h> | 31 | #include <nvgpu/firmware.h> |
32 | #include <nvgpu/enabled.h> | 32 | #include <nvgpu/enabled.h> |
33 | #include <nvgpu/debug.h> | 33 | #include <nvgpu/debug.h> |
34 | #include <nvgpu/barrier.h> | ||
34 | 35 | ||
35 | #include "gk20a.h" | 36 | #include "gk20a.h" |
36 | #include "kind_gk20a.h" | 37 | #include "kind_gk20a.h" |
@@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
554 | gk20a_writel(g, gr_fecs_method_push_r(), | 555 | gk20a_writel(g, gr_fecs_method_push_r(), |
555 | gr_fecs_method_push_adr_f(op.method.addr)); | 556 | gr_fecs_method_push_adr_f(op.method.addr)); |
556 | 557 | ||
557 | /* op.mb.id == 4 cases require waiting for completion on | 558 | /* op.mailbox.id == 4 cases require waiting for completion on |
558 | * for op.mb.id == 0 */ | 559 | * for op.mailbox.id == 0 */ |
559 | if (op.mailbox.id == 4) | 560 | if (op.mailbox.id == 4) |
560 | op.mailbox.id = 0; | 561 | op.mailbox.id = 0; |
561 | 562 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index e688c863..8e913f23 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <nvgpu/timers.h> | 21 | #include <nvgpu/timers.h> |
22 | #include <nvgpu/log.h> | 22 | #include <nvgpu/log.h> |
23 | #include <nvgpu/atomic.h> | 23 | #include <nvgpu/atomic.h> |
24 | #include <nvgpu/barrier.h> | ||
24 | 25 | ||
25 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> | 26 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> |
26 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | 27 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> |
@@ -50,7 +51,7 @@ void channel_gm20b_bind(struct channel_gk20a *c) | |||
50 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & | 51 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & |
51 | ~ccsr_channel_enable_set_f(~0)) | | 52 | ~ccsr_channel_enable_set_f(~0)) | |
52 | ccsr_channel_enable_set_true_f()); | 53 | ccsr_channel_enable_set_true_f()); |
53 | wmb(); | 54 | nvgpu_smp_wmb(); |
54 | nvgpu_atomic_set(&c->bound, true); | 55 | nvgpu_atomic_set(&c->bound, true); |
55 | } | 56 | } |
56 | 57 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/barrier.h new file mode 100644 index 00000000..26eec3ed --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/barrier.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | /* This file contains NVGPU_* high-level abstractions for various | ||
18 | * memor-barrier operations available in linux/kernel. Every OS | ||
19 | * should provide their own OS specific calls under this common API | ||
20 | */ | ||
21 | |||
22 | #ifndef __NVGPU_BARRIER_H__ | ||
23 | #define __NVGPU_BARRIER_H__ | ||
24 | |||
25 | #ifdef __KERNEL__ | ||
26 | #include <nvgpu/linux/barrier.h> | ||
27 | #endif | ||
28 | |||
29 | #define nvgpu_mb() __nvgpu_mb() | ||
30 | #define nvgpu_rmb() __nvgpu_rmb() | ||
31 | #define nvgpu_wmb() __nvgpu_wmb() | ||
32 | |||
33 | #define nvgpu_smp_mb() __nvgpu_smp_mb() | ||
34 | #define nvgpu_smp_rmb() __nvgpu_smp_rmb() | ||
35 | #define nvgpu_smp_wmb() __nvgpu_smp_wmb() | ||
36 | |||
37 | #define nvgpu_read_barrier_depends() __nvgpu_read_barrier_depends() | ||
38 | #define nvgpu_smp_read_barrier_depends() __nvgpu_smp_read_barrier_depends() | ||
39 | |||
40 | #endif /* __NVGPU_BARRIER_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h new file mode 100644 index 00000000..e7b83ee8 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_BARRIER_LINUX_H__ | ||
18 | #define __NVGPU_BARRIER_LINUX_H__ | ||
19 | |||
20 | #include <asm/barrier.h> | ||
21 | |||
22 | #define __nvgpu_mb() mb() | ||
23 | #define __nvgpu_rmb() rmb() | ||
24 | #define __nvgpu_wmb() wmb() | ||
25 | |||
26 | #define __nvgpu_smp_mb() smp_mb() | ||
27 | #define __nvgpu_smp_rmb() smp_rmb() | ||
28 | #define __nvgpu_smp_wmb() smp_wmb() | ||
29 | |||
30 | #define __nvgpu_read_barrier_depends() read_barrier_depends() | ||
31 | #define __nvgpu_smp_read_barrier_depends() smp_read_barrier_depends() | ||
32 | |||
33 | #endif /* __NVGPU_BARRIER_LINUX_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index c8519905..3f03e25a 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <nvgpu/dma.h> | 20 | #include <nvgpu/dma.h> |
21 | #include <nvgpu/atomic.h> | 21 | #include <nvgpu/atomic.h> |
22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/barrier.h> | ||
23 | 24 | ||
24 | #include "vgpu/vgpu.h" | 25 | #include "vgpu/vgpu.h" |
25 | #include "gk20a/ctxsw_trace_gk20a.h" | 26 | #include "gk20a/ctxsw_trace_gk20a.h" |
@@ -42,7 +43,7 @@ static void vgpu_channel_bind(struct channel_gk20a *ch) | |||
42 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 43 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
43 | WARN_ON(err || msg.ret); | 44 | WARN_ON(err || msg.ret); |
44 | 45 | ||
45 | wmb(); | 46 | nvgpu_smp_wmb(); |
46 | nvgpu_atomic_set(&ch->bound, true); | 47 | nvgpu_atomic_set(&ch->bound, true); |
47 | } | 48 | } |
48 | 49 | ||
@@ -370,7 +371,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g) | |||
370 | v = gk20a_bar1_readl(g, bar1_vaddr); | 371 | v = gk20a_bar1_readl(g, bar1_vaddr); |
371 | 372 | ||
372 | *cpu_vaddr = v1; | 373 | *cpu_vaddr = v1; |
373 | smp_mb(); | 374 | nvgpu_smp_mb(); |
374 | 375 | ||
375 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { | 376 | if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { |
376 | nvgpu_err(g, "bar1 broken @ gk20a!"); | 377 | nvgpu_err(g, "bar1 broken @ gk20a!"); |
@@ -728,7 +729,7 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g, | |||
728 | 729 | ||
729 | /* mark channel as faulted */ | 730 | /* mark channel as faulted */ |
730 | ch->has_timedout = true; | 731 | ch->has_timedout = true; |
731 | wmb(); | 732 | nvgpu_smp_wmb(); |
732 | /* unblock pending waits */ | 733 | /* unblock pending waits */ |
733 | nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); | 734 | nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); |
734 | nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); | 735 | nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); |