From 81868a187fa3b217368206f17b19309846e8e7fb Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Fri, 18 Aug 2017 16:22:29 +0530 Subject: gpu: nvgpu: Nvgpu abstraction for linux barriers. construct wrapper nvgpu_* methods to replace mb,rmb,wmb,smp_mb,smp_rmb,smp_wmb,read_barrier_depends and smp_read_barrier_depends. NVGPU-122 Change-Id: I8d24dd70fef5cb0fadaacc15f3ab11531667a0df Signed-off-by: Debarshi Reviewed-on: https://git-master.nvidia.com/r/1541199 Reviewed-by: svccoveritychecker Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Sourab Gupta Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/clk/clk_arb.c | 43 ++++++++++--------- drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | 5 ++- drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 5 ++- drivers/gpu/nvgpu/common/mm/gmmu.c | 13 +++--- drivers/gpu/nvgpu/common/mm/lockless_allocator.c | 5 ++- drivers/gpu/nvgpu/common/pmu/pmu.c | 3 +- drivers/gpu/nvgpu/common/pmu/pmu_pg.c | 3 +- drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 3 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 54 +++++++++++++----------- drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 3 +- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 3 +- drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 7 +-- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 5 ++- drivers/gpu/nvgpu/gk20a/gk20a.h | 7 +-- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 ++- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 3 +- drivers/gpu/nvgpu/include/nvgpu/barrier.h | 40 ++++++++++++++++++ drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h | 33 +++++++++++++++ drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 7 +-- 19 files changed, 171 insertions(+), 76 deletions(-) create mode 100644 drivers/gpu/nvgpu/include/nvgpu/barrier.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index b00ecd31..f1de54c6 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "clk/clk_arb.h" @@ -386,7 +387,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) goto init_fail; do { /* Check that first run is completed */ - smp_mb(); + nvgpu_smp_mb(); wait_event_interruptible(arb->request_wq, nvgpu_atomic_read(&arb->req_nr)); } while (!nvgpu_atomic_read(&arb->req_nr)); @@ -578,7 +579,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; /* make sure that the initialization of the pool is visible * before the update */ - smp_wmb(); + nvgpu_smp_wmb(); session->target = &session->target_pool[0]; init_llist_head(&session->targets); @@ -706,7 +707,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) table = ACCESS_ONCE(arb->current_vf_table); /* make flag visible when all data has resolved in the tables */ - smp_rmb(); + nvgpu_smp_rmb(); table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : &arb->vf_table_pool[0]; @@ -980,7 +981,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) } /* make table visible when all data has resolved in the tables */ - smp_wmb(); + nvgpu_smp_wmb(); xchg(&arb->current_vf_table, table); exit_vf_table: @@ -1077,7 +1078,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) &session->target_pool[1] : &session->target_pool[0]; /* Do not reorder pointer */ - smp_rmb(); + nvgpu_smp_rmb(); head = llist_del_all(&session->targets); if (head) { @@ -1102,7 +1103,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) llist_add(&dev->node, &arb->requests); } /* Ensure target is updated before ptr sawp */ - smp_wmb(); + nvgpu_smp_wmb(); xchg(&session->target, target); } @@ -1148,7 +1149,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) if (pstate == VF_POINT_INVALID_PSTATE) { arb->status = -EINVAL; /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } @@ -1175,7 +1176,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) nvgpu_mutex_release(&arb->pstate_lock); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); @@ -1184,7 +1185,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) nvgpu_mutex_release(&arb->pstate_lock); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } @@ -1196,7 +1197,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) nvgpu_mutex_release(&arb->pstate_lock); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } @@ -1206,7 +1207,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) nvgpu_mutex_release(&arb->pstate_lock); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } @@ -1216,7 +1217,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) nvgpu_mutex_release(&arb->pstate_lock); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } @@ -1224,7 +1225,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) &arb->actual_pool[1] : &arb->actual_pool[0]; /* do not reorder this pointer */ - smp_rmb(); + nvgpu_smp_rmb(); actual->gpc2clk = gpc2clk_target; actual->mclk = mclk_target; arb->voltuv_actual = voltuv; @@ -1232,7 +1233,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) arb->status = status; /* Make changes visible to other threads */ - smp_wmb(); + nvgpu_smp_wmb(); xchg(&arb->actual, actual); status = nvgpu_lpwr_enable_pg(g, false); @@ -1241,12 +1242,12 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) nvgpu_mutex_release(&arb->pstate_lock); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); goto exit_arb; } /* status must be visible before atomic inc */ - smp_wmb(); + nvgpu_smp_wmb(); nvgpu_atomic_inc(&arb->req_nr); /* Unlock pstate change for PG */ @@ -1287,7 +1288,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) (curr - debug->switch_avg) * (curr - prev_avg); } /* commit changes before exchanging debug pointer */ - smp_wmb(); + nvgpu_smp_wmb(); xchg(&arb->debug, debug); #endif @@ -1687,7 +1688,7 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, do { target = ACCESS_ONCE(session->target); /* no reordering of this pointer */ - smp_rmb(); + nvgpu_smp_rmb(); switch (api_domain) { case NVGPU_GPU_CLK_DOMAIN_MCLK: @@ -1716,7 +1717,7 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, do { actual = ACCESS_ONCE(arb->actual); /* no reordering of this pointer */ - smp_rmb(); + nvgpu_smp_rmb(); switch (api_domain) { case NVGPU_GPU_CLK_DOMAIN_MCLK: @@ -1854,7 +1855,7 @@ static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, table = ACCESS_ONCE(arb->current_vf_table); /* pointer to table can be updated by callback */ - smp_rmb(); + nvgpu_smp_rmb(); if (!table) continue; @@ -2039,7 +2040,7 @@ static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) debug = ACCESS_ONCE(arb->debug); /* Make copy of structure and ensure no reordering */ - smp_rmb(); + nvgpu_smp_rmb(); if (!debug) return -EINVAL; diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c index eae0475a..274e9c93 100644 --- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "bitmap_allocator_priv.h" @@ -40,7 +41,7 @@ static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a) struct nvgpu_bitmap_allocator *ba = a->priv; int inited = ba->inited; - rmb(); + nvgpu_smp_rmb(); return inited; } @@ -408,7 +409,7 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, goto fail; } - wmb(); + nvgpu_smp_wmb(); a->inited = true; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 0ef94c10..3e305bb8 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "gk20a/mm_gk20a.h" #include "gk20a/platform_gk20a.h" @@ -1064,7 +1065,7 @@ static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a) struct nvgpu_buddy_allocator *ba = a->priv; int inited = ba->initialized; - rmb(); + nvgpu_smp_rmb(); return inited; } @@ -1289,7 +1290,7 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, if (err) goto fail; - wmb(); + nvgpu_smp_wmb(); a->initialized = 1; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 73dff2c3..7f486d68 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" @@ -164,8 +165,8 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) return err; /* - * One mb() is done after all mapping operations. Don't need individual - * barriers for each PD write. + * One nvgpu_smp_mb() is done after all mapping operations. Don't need + * individual barriers for each PD write. */ vm->pdb.mem->skip_wmb = true; @@ -259,8 +260,8 @@ static int pd_allocate(struct vm_gk20a *vm, } /* - * One mb() is done after all mapping operations. Don't need individual - * barriers for each PD write. + * One nvgpu_smp_mb() is done after all mapping operations. Don't need + * individual barriers for each PD write. */ pd->mem->skip_wmb = true; @@ -714,7 +715,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, attrs); unmap_gmmu_pages(g, &vm->pdb); - mb(); + nvgpu_smp_mb(); __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); @@ -983,7 +984,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) * There probably also needs to be a TLB invalidate as well but we leave * that to the caller of this function. */ - wmb(); + nvgpu_smp_wmb(); return 0; } diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c index eeb86095..8f712a14 100644 --- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "lockless_allocator_priv.h" @@ -39,7 +40,7 @@ static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a) struct nvgpu_lockless_allocator *pa = a->priv; int inited = pa->inited; - rmb(); + nvgpu_smp_rmb(); return inited; } @@ -198,7 +199,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, a->flags = flags; nvgpu_atomic_set(&a->nr_allocs, 0); - wmb(); + nvgpu_smp_wmb(); a->inited = true; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c index 58108722..63597d10 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" @@ -394,7 +395,7 @@ void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state, } /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); } static int nvgpu_pg_init_task(void *arg) diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c index 935ae95a..b435f4a7 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" @@ -84,7 +85,7 @@ static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, true); WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); /* make status visible */ - smp_mb(); + nvgpu_smp_mb(); } else nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED, true); diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index f50fec13..3e979ebd 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -27,6 +27,7 @@ #include #include #include +#include static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) { @@ -654,7 +655,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, /* take always the postfence as it is needed for protecting the ce context */ submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; - wmb(); + nvgpu_smp_wmb(); ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, 1, submit_flags, &fence, diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5f81b441..0c1b06e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "gk20a.h" #include "ctxsw_trace_gk20a.h" @@ -245,9 +246,9 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) /* * ensure put is read before any subsequent reads. - * see corresponding wmb in gk20a_channel_add_job() + * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job() */ - rmb(); + nvgpu_smp_rmb(); while (tmp_get != put) { job = &ch->joblist.pre_alloc.jobs[tmp_get]; @@ -618,7 +619,7 @@ unbind: /* make sure we catch accesses of unopened channels in case * there's non-refcounted channel pointers hanging around */ ch->g = NULL; - wmb(); + nvgpu_smp_wmb(); /* ALWAYS last */ free_channel(f, ch); @@ -880,7 +881,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, * gk20a_free_channel() */ ch->referenceable = true; nvgpu_atomic_set(&ch->ref_count, 1); - wmb(); + nvgpu_smp_wmb(); return ch; } @@ -993,9 +994,9 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, /* * commit the previous writes before making the entry valid. - * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). + * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). */ - wmb(); + nvgpu_smp_wmb(); e->valid = true; gk20a_dbg_fn("done"); @@ -1025,9 +1026,10 @@ static int channel_gk20a_alloc_job(struct channel_gk20a *c, /* * ensure all subsequent reads happen after reading get. - * see corresponding wmb in gk20a_channel_clean_up_jobs() + * see corresponding nvgpu_smp_wmb in + * gk20a_channel_clean_up_jobs() */ - rmb(); + nvgpu_smp_rmb(); if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) *job_out = &c->joblist.pre_alloc.jobs[put]; @@ -1137,7 +1139,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) { bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; - rmb(); + nvgpu_smp_rmb(); return pre_alloc_enabled; } @@ -1194,9 +1196,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, /* * commit the previous writes before setting the flag. - * see corresponding rmb in channel_gk20a_is_prealloc_enabled() + * see corresponding nvgpu_smp_rmb in + * channel_gk20a_is_prealloc_enabled() */ - wmb(); + nvgpu_smp_wmb(); c->joblist.pre_alloc.enabled = true; return 0; @@ -1218,9 +1221,10 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) /* * commit the previous writes before disabling the flag. - * see corresponding rmb in channel_gk20a_is_prealloc_enabled() + * see corresponding nvgpu_smp_rmb in + * channel_gk20a_is_prealloc_enabled() */ - wmb(); + nvgpu_smp_wmb(); c->joblist.pre_alloc.enabled = false; } @@ -1741,8 +1745,8 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g) /* * Currently, the only work type is associated with a lock, which deals * with any necessary barriers. If a work type with no locking were - * added, a a wmb() would be needed here. See ..worker_pending() for a - * pair. + * added, a nvgpu_smp_wmb() would be needed here. See + * ..worker_pending() for a pair. */ put = nvgpu_atomic_inc_return(&g->channel_worker.put); @@ -1764,8 +1768,9 @@ static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; /* - * This would be the place for a rmb() pairing a wmb() for a wakeup - * if we had any work with no implicit barriers caused by locking. + * This would be the place for a nvgpu_smp_rmb() pairing + * a nvgpu_smp_wmb() for a wakeup if we had any work with + * no implicit barriers caused by locking. */ return pending; @@ -1939,7 +1944,7 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) if (e->valid) { /* read the entry's valid flag before reading its contents */ - rmb(); + nvgpu_smp_rmb(); if ((q->get != e->off) && e->off != 0) nvgpu_err(g, "requests out-of-order, ch=%d", c->chid); @@ -1984,10 +1989,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, /* * ensure all pending write complete before adding to the list. - * see corresponding rmb in gk20a_channel_clean_up_jobs() & + * see corresponding nvgpu_smp_rmb in + * gk20a_channel_clean_up_jobs() & * gk20a_channel_abort_clean_up() */ - wmb(); + nvgpu_smp_wmb(); channel_gk20a_joblist_add(c, job); if (!pre_alloc_enabled) @@ -2061,10 +2067,10 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, /* * ensure that all subsequent reads occur after checking - * that we have a valid node. see corresponding wmb in + * that we have a valid node. see corresponding nvgpu_smp_wmb in * gk20a_channel_add_job(). */ - rmb(); + nvgpu_smp_rmb(); job = channel_gk20a_joblist_peek(c); channel_gk20a_joblist_unlock(c); @@ -2127,9 +2133,9 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, /* * ensure all pending writes complete before freeing up the job. - * see corresponding rmb in channel_gk20a_alloc_job(). + * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). */ - wmb(); + nvgpu_smp_wmb(); channel_gk20a_free_job(c, job); job_finished = 1; diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 546917f1..91c3b206 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -635,7 +636,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g, dev->ents[write_idx] = *entry; /* ensure record is written before updating write index */ - smp_wmb(); + nvgpu_smp_wmb(); write_idx++; if (unlikely(write_idx >= hdr->num_ents)) diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 5ee90440..fea3b0fa 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ctxsw_trace_gk20a.h" #include "fecs_trace_gk20a.h" @@ -370,7 +371,7 @@ int gk20a_fecs_trace_poll(struct gk20a *g) } /* ensure FECS records has been updated before incrementing read index */ - wmb(); + nvgpu_smp_wmb(); gk20a_fecs_trace_set_read_index(g, read); done: diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 8ad24c44..5fa9a0df 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "gk20a.h" #include "channel_gk20a.h" @@ -73,7 +74,7 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) { bool valid = f->valid; - rmb(); + nvgpu_smp_rmb(); return valid; } @@ -252,7 +253,7 @@ int gk20a_fence_from_semaphore( f->semaphore_wq = semaphore_wq; /* commit previous writes before setting the valid flag */ - wmb(); + nvgpu_smp_wmb(); f->valid = true; return 0; @@ -327,7 +328,7 @@ int gk20a_fence_from_syncpt( f->syncpt_value = value; /* commit previous writes before setting the valid flag */ - wmb(); + nvgpu_smp_wmb(); f->valid = true; return 0; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 47e7d82e..fd249bc9 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a.h" #include "ctxsw_trace_gk20a.h" @@ -966,7 +967,7 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g) v = gk20a_bar1_readl(g, bar1_vaddr); *cpu_vaddr = v1; - smp_mb(); + nvgpu_smp_mb(); if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { nvgpu_err(g, "bar1 broken @ gk20a: CPU wrote 0x%x, \ @@ -1309,7 +1310,7 @@ static void gk20a_fifo_set_has_timedout_and_wake_up_wqs(struct gk20a *g, if (refch) { /* mark channel as faulted */ refch->has_timedout = true; - wmb(); + nvgpu_smp_wmb(); /* unblock pending waits */ nvgpu_cond_broadcast_interruptible(&refch->semaphore_wq); nvgpu_cond_broadcast_interruptible(&refch->notifier_wq); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 19ea76cb..ab2d0b7f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -49,6 +49,7 @@ struct nvgpu_cpu_time_correlation_sample; #include #include #include +#include #include "clk_gk20a.h" #include "ce2_gk20a.h" @@ -1324,7 +1325,7 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); } else { writel_relaxed(v, g->regs + r); - wmb(); + nvgpu_smp_wmb(); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); } } @@ -1351,7 +1352,7 @@ static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v) __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); } else { - wmb(); + nvgpu_smp_wmb(); do { writel_relaxed(v, g->regs + r); } while (readl(g->regs + r) != v); @@ -1365,7 +1366,7 @@ static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v) __gk20a_warn_on_no_regs(); gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); } else { - wmb(); + nvgpu_smp_wmb(); writel_relaxed(v, g->bar1 + b); gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index cd1d31a5..27442947 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "gk20a.h" #include "kind_gk20a.h" @@ -554,8 +555,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, gk20a_writel(g, gr_fecs_method_push_r(), gr_fecs_method_push_adr_f(op.method.addr)); - /* op.mb.id == 4 cases require waiting for completion on - * for op.mb.id == 0 */ + /* op.mailbox.id == 4 cases require waiting for completion on + * for op.mailbox.id == 0 */ if (op.mailbox.id == 4) op.mailbox.id = 0; diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index e688c863..8e913f23 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -50,7 +51,7 @@ void channel_gm20b_bind(struct channel_gk20a *c) (gk20a_readl(g, ccsr_channel_r(c->chid)) & ~ccsr_channel_enable_set_f(~0)) | ccsr_channel_enable_set_true_f()); - wmb(); + nvgpu_smp_wmb(); nvgpu_atomic_set(&c->bound, true); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/barrier.h new file mode 100644 index 00000000..26eec3ed --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/barrier.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* This file contains NVGPU_* high-level abstractions for various + * memor-barrier operations available in linux/kernel. Every OS + * should provide their own OS specific calls under this common API + */ + +#ifndef __NVGPU_BARRIER_H__ +#define __NVGPU_BARRIER_H__ + +#ifdef __KERNEL__ +#include +#endif + +#define nvgpu_mb() __nvgpu_mb() +#define nvgpu_rmb() __nvgpu_rmb() +#define nvgpu_wmb() __nvgpu_wmb() + +#define nvgpu_smp_mb() __nvgpu_smp_mb() +#define nvgpu_smp_rmb() __nvgpu_smp_rmb() +#define nvgpu_smp_wmb() __nvgpu_smp_wmb() + +#define nvgpu_read_barrier_depends() __nvgpu_read_barrier_depends() +#define nvgpu_smp_read_barrier_depends() __nvgpu_smp_read_barrier_depends() + +#endif /* __NVGPU_BARRIER_H__ */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h new file mode 100644 index 00000000..e7b83ee8 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/barrier.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVGPU_BARRIER_LINUX_H__ +#define __NVGPU_BARRIER_LINUX_H__ + +#include + +#define __nvgpu_mb() mb() +#define __nvgpu_rmb() rmb() +#define __nvgpu_wmb() wmb() + +#define __nvgpu_smp_mb() smp_mb() +#define __nvgpu_smp_rmb() smp_rmb() +#define __nvgpu_smp_wmb() smp_wmb() + +#define __nvgpu_read_barrier_depends() read_barrier_depends() +#define __nvgpu_smp_read_barrier_depends() smp_read_barrier_depends() + +#endif /* __NVGPU_BARRIER_LINUX_H__ */ diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index c8519905..3f03e25a 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "vgpu/vgpu.h" #include "gk20a/ctxsw_trace_gk20a.h" @@ -42,7 +43,7 @@ static void vgpu_channel_bind(struct channel_gk20a *ch) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - wmb(); + nvgpu_smp_wmb(); nvgpu_atomic_set(&ch->bound, true); } @@ -370,7 +371,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g) v = gk20a_bar1_readl(g, bar1_vaddr); *cpu_vaddr = v1; - smp_mb(); + nvgpu_smp_mb(); if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) { nvgpu_err(g, "bar1 broken @ gk20a!"); @@ -728,7 +729,7 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g, /* mark channel as faulted */ ch->has_timedout = true; - wmb(); + nvgpu_smp_wmb(); /* unblock pending waits */ nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); -- cgit v1.2.2