60 files changed, 820 insertions, 816 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 9232c3dc..062e4e2b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -18,7 +18,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/nvgpu.h>
 #include <linux/bitops.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <linux/rculist.h>
 #include <linux/llist.h>
 #include "clk/clk_arb.h"
@@ -139,10 +139,10 @@ struct nvgpu_clk_arb_target {
 };
 struct nvgpu_clk_arb {
-        spinlock_t sessions_lock;
+        struct nvgpu_spinlock sessions_lock;
-        spinlock_t users_lock;
+        struct nvgpu_spinlock users_lock;
-        struct mutex pstate_lock;
+        struct nvgpu_mutex pstate_lock;
        struct list_head users;
        struct list_head sessions;
        struct llist_head requests;
@@ -308,9 +308,9 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
        g->clk_arb = arb;
        arb->g = g;
-        mutex_init(&arb->pstate_lock);
+        nvgpu_mutex_init(&arb->pstate_lock);
-        spin_lock_init(&arb->sessions_lock);
+        nvgpu_spinlock_init(&arb->sessions_lock);
-        spin_lock_init(&arb->users_lock);
+        nvgpu_spinlock_init(&arb->users_lock);
        err =  g->ops.clk_arb.get_arbiter_clk_default(g,
                        CTRL_CLK_DOMAIN_MCLK, &default_mhz);
@@ -546,9 +546,9 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
        init_llist_head(&session->targets);
-        spin_lock(&arb->sessions_lock);
+        nvgpu_spinlock_acquire(&arb->sessions_lock);
        list_add_tail_rcu(&session->link, &arb->sessions);
-        spin_unlock(&arb->sessions_lock);
+        nvgpu_spinlock_release(&arb->sessions_lock);
        *_session = session;
@@ -573,9 +573,9 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount)
        gk20a_dbg_fn("");
-        spin_lock(&arb->sessions_lock);
+        nvgpu_spinlock_acquire(&arb->sessions_lock);
        list_del_rcu(&session->link);
-        spin_unlock(&arb->sessions_lock);
+        nvgpu_spinlock_release(&arb->sessions_lock);
        head = llist_del_all(&session->targets);
        llist_for_each_entry_safe(dev, tmp, head, node) {
@@ -622,9 +622,9 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
        dev->arb_queue_head = atomic_read(&arb->notification_queue.head);
-        spin_lock(&arb->users_lock);
+        nvgpu_spinlock_acquire(&arb->users_lock);
        list_add_tail_rcu(&dev->link, &arb->users);
-        spin_unlock(&arb->users_lock);
+        nvgpu_spinlock_release(&arb->users_lock);
        *event_fd = fd;
@@ -1128,13 +1128,13 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        /* Program clocks */
        /* A change in both mclk of gpc2clk may require a change in voltage */
-        mutex_lock(&arb->pstate_lock);
+        nvgpu_mutex_acquire(&arb->pstate_lock);
        status = nvgpu_lpwr_disable_pg(g, false);
        status = clk_pmu_freq_controller_load(g, false);
        if (status < 0) {
                arb->status = status;
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
@@ -1143,7 +1143,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
        if (status < 0) {
                arb->status = status;
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
@@ -1155,7 +1155,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
                voltuv_sram);
        if (status < 0) {
                arb->status = status;
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
@@ -1165,7 +1165,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        status = clk_pmu_freq_controller_load(g, true);
        if (status < 0) {
                arb->status = status;
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
@@ -1175,7 +1175,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        status = nvgpu_lwpr_mclk_change(g, pstate);
        if (status < 0) {
                arb->status = status;
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
@@ -1200,7 +1200,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        status = nvgpu_lpwr_enable_pg(g, false);
        if (status < 0) {
                arb->status = status;
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
                /* make status visible */
                smp_mb();
@@ -1212,7 +1212,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        atomic_inc(&arb->req_nr);
        /* Unlock pstate change for PG */
-        mutex_unlock(&arb->pstate_lock);
+        nvgpu_mutex_release(&arb->pstate_lock);
        /* VF Update complete */
        nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));
@@ -1589,9 +1589,9 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
        gk20a_dbg_fn("");
-        spin_lock(&arb->users_lock);
+        nvgpu_spinlock_acquire(&arb->users_lock);
        list_del_rcu(&dev->link);
-        spin_unlock(&arb->users_lock);
+        nvgpu_spinlock_release(&arb->users_lock);
        synchronize_rcu();
        kref_put(&session->refcount, nvgpu_clk_arb_free_session);
@@ -2000,9 +2000,9 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
        struct nvgpu_clk_arb *arb = g->clk_arb;
        if (lock)
-                mutex_lock(&arb->pstate_lock);
+                nvgpu_mutex_acquire(&arb->pstate_lock);
        else
-                mutex_unlock(&arb->pstate_lock);
+                nvgpu_mutex_release(&arb->pstate_lock);
 }
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 815f55ba..c2e9b35c 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2185,8 +2185,8 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
        mclk = &g->clk_pmu.clk_mclk;
-        mutex_init(&mclk->mclk_lock);
+        nvgpu_mutex_init(&mclk->mclk_lock);
-        mutex_init(&mclk->data_lock);
+        nvgpu_mutex_init(&mclk->data_lock);
        /* FBPA gain WAR */
        gk20a_writel(g, fb_fbpa_fbio_iref_byte_rx_ctrl_r(), 0x22222222);
@@ -2257,7 +2257,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
        mclk = &g->clk_pmu.clk_mclk;
-        mutex_lock(&mclk->mclk_lock);
+        nvgpu_mutex_acquire(&mclk->mclk_lock);
        if (!mclk->init)
                goto exit_status;
@@ -2364,7 +2364,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 #ifdef CONFIG_DEBUG_FS
        g->ops.read_ptimer(g, &t1);
-        mutex_lock(&mclk->data_lock);
+        nvgpu_mutex_acquire(&mclk->data_lock);
        mclk->switch_num++;
        if (mclk->switch_num == 1) {
@@ -2387,11 +2387,11 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
                mclk->switch_std +=
                        (curr - mclk->switch_avg) * (curr - prev_avg);
        }
-        mutex_unlock(&mclk->data_lock);
+        nvgpu_mutex_release(&mclk->data_lock);
 #endif
 exit_status:
-        mutex_unlock(&mclk->mclk_lock);
+        nvgpu_mutex_release(&mclk->mclk_lock);
        return status;
 }
@@ -2429,13 +2429,13 @@ static int mclk_switch_stats_show(struct seq_file *s, void *unused)
        mclk = &g->clk_pmu.clk_mclk;
        /* Make copy of structure to reduce time with lock held */
-        mutex_lock(&mclk->data_lock);
+        nvgpu_mutex_acquire(&mclk->data_lock);
        std = mclk->switch_std;
        avg = mclk->switch_avg;
        max = mclk->switch_max;
        min = mclk->switch_min;
        num = mclk->switch_num;
-        mutex_unlock(&mclk->data_lock);
+        nvgpu_mutex_release(&mclk->data_lock);
        tmp = std;
        do_div(tmp, num);
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index cb7f0de0..731f289d 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+* Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -14,7 +14,7 @@
 #ifndef _CLKMCLK_H_
 #define _CLKMCLK_H_
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 enum gk20a_mclk_speed {
        gk20a_mclk_low_speed,
@@ -24,8 +24,8 @@ enum gk20a_mclk_speed {
 struct clk_mclk_state {
        enum gk20a_mclk_speed speed;
-        struct mutex mclk_lock;
+        struct nvgpu_mutex mclk_lock;
-        struct mutex data_lock;
+        struct nvgpu_mutex data_lock;
        u16 p5_min;
        u16 p0_min;
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index cf8c4569..20209efc 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -1,7 +1,7 @@
 /*
 * gk20a allocator
 *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -140,7 +140,7 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a,
        a->priv = priv;
        a->debug = dbg;
-        mutex_init(&a->lock);
+        nvgpu_mutex_init(&a->lock);
        strlcpy(a->name, name, sizeof(a->name));
diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c
index 80f1cca0..6b5cfa55 100644
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/nvgpu_common.c
@@ -34,13 +34,13 @@ static void nvgpu_init_vars(struct gk20a *g)
        init_rwsem(&g->busy_lock);
-        spin_lock_init(&g->mc_enable_lock);
+        nvgpu_spinlock_init(&g->mc_enable_lock);
-        mutex_init(&platform->railgate_lock);
+        nvgpu_mutex_init(&platform->railgate_lock);
-        mutex_init(&g->dbg_sessions_lock);
+        nvgpu_mutex_init(&g->dbg_sessions_lock);
-        mutex_init(&g->client_lock);
+        nvgpu_mutex_init(&g->client_lock);
-        mutex_init(&g->ch_wdt_lock);
+        nvgpu_mutex_init(&g->ch_wdt_lock);
-        mutex_init(&g->poweroff_lock);
+        nvgpu_mutex_init(&g->poweroff_lock);
        g->regs_saved = g->regs;
        g->bar1_saved = g->bar1;
@@ -52,7 +52,7 @@ static void nvgpu_init_vars(struct gk20a *g)
        dma_set_max_seg_size(g->dev, UINT_MAX);
        INIT_LIST_HEAD(&g->pending_sema_waits);
-        raw_spin_lock_init(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
 }
 static void nvgpu_init_timeout(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 4bf8695d..919f26ec 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -24,13 +24,13 @@
 #define __lock_sema_sea(s)                                              \
        do {                                                            \
                gpu_sema_verbose_dbg("Acquiring sema lock...");         \
-                mutex_lock(&s->sea_lock);                               \
+                nvgpu_mutex_acquire(&s->sea_lock);                      \
                gpu_sema_verbose_dbg("Sema lock aquried!");             \
        } while (0)
 #define __unlock_sema_sea(s)                                            \
        do {                                                            \
-                mutex_unlock(&s->sea_lock);                             \
+                nvgpu_mutex_release(&s->sea_lock);                      \
                gpu_sema_verbose_dbg("Released sema lock");             \
        } while (0)
@@ -81,7 +81,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
        g->sema_sea->page_count = 0;
        g->sema_sea->gk20a = g;
        INIT_LIST_HEAD(&g->sema_sea->pool_list);
-        mutex_init(&g->sema_sea->sea_lock);
+        nvgpu_mutex_init(&g->sema_sea->sea_lock);
        if (__nvgpu_semaphore_sea_grow(g->sema_sea))
                goto cleanup;
@@ -138,7 +138,7 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
        p->sema_sea = sea;
        INIT_LIST_HEAD(&p->hw_semas);
        kref_init(&p->ref);
-        mutex_init(&p->pool_lock);
+        nvgpu_mutex_init(&p->pool_lock);
        sea->page_count++;
        list_add(&p->pool_list_entry, &sea->pool_list);
@@ -344,7 +344,7 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
        BUG_ON(!p);
-        mutex_lock(&p->pool_lock);
+        nvgpu_mutex_acquire(&p->pool_lock);
        /* Find an available HW semaphore. */
        hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
@@ -371,14 +371,14 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
        list_add(&hw_sema->hw_sema_list, &p->hw_semas);
-        mutex_unlock(&p->pool_lock);
+        nvgpu_mutex_release(&p->pool_lock);
        return 0;
 fail_free_idx:
        clear_bit(hw_sema_idx, p->semas_alloced);
 fail:
-        mutex_unlock(&p->pool_lock);
+        nvgpu_mutex_release(&p->pool_lock);
        return ret;
 }
@@ -391,7 +391,7 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
        BUG_ON(!p);
-        mutex_lock(&p->pool_lock);
+        nvgpu_mutex_acquire(&p->pool_lock);
        clear_bit(ch->hw_sema->idx, p->semas_alloced);
@@ -400,7 +400,7 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
        kfree(ch->hw_sema);
        ch->hw_sema = NULL;
-        mutex_unlock(&p->pool_lock);
+        nvgpu_mutex_release(&p->pool_lock);
 }
 /*
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 2a9ad40d..d43bc93f 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -101,9 +101,9 @@ __acquires(&cde_app->mutex)
                return;
        if (wait_finish) {
-                mutex_unlock(&cde_app->mutex);
+                nvgpu_mutex_release(&cde_app->mutex);
                cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
-                mutex_lock(&cde_app->mutex);
+                nvgpu_mutex_acquire(&cde_app->mutex);
        } else {
                cancel_delayed_work(&cde_ctx->ctx_deleter_work);
        }
@@ -152,9 +152,9 @@ __releases(&cde_app->mutex)
        if (!cde_app->initialised)
                return;
-        mutex_lock(&cde_app->mutex);
+        nvgpu_mutex_acquire(&cde_app->mutex);
        gk20a_cde_stop(g);
-        mutex_unlock(&cde_app->mutex);
+        nvgpu_mutex_release(&cde_app->mutex);
 }
 void gk20a_cde_suspend(struct gk20a *g)
@@ -167,7 +167,7 @@ __releases(&cde_app->mutex)
        if (!cde_app->initialised)
                return;
-        mutex_lock(&cde_app->mutex);
+        nvgpu_mutex_acquire(&cde_app->mutex);
        list_for_each_entry_safe(cde_ctx, cde_ctx_save,
                        &cde_app->free_contexts, list) {
@@ -179,7 +179,7 @@ __releases(&cde_app->mutex)
                gk20a_cde_cancel_deleter(cde_ctx, false);
        }
-        mutex_unlock(&cde_app->mutex);
+        nvgpu_mutex_release(&cde_app->mutex);
 }
@@ -739,7 +739,7 @@ __releases(&cde_app->mutex)
        gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
        trace_gk20a_cde_release(cde_ctx);
-        mutex_lock(&cde_app->mutex);
+        nvgpu_mutex_acquire(&cde_app->mutex);
        if (cde_ctx->in_use) {
                cde_ctx->in_use = false;
@@ -749,7 +749,7 @@ __releases(&cde_app->mutex)
                gk20a_dbg_info("double release cde context %p", cde_ctx);
        }
-        mutex_unlock(&cde_app->mutex);
+        nvgpu_mutex_release(&cde_app->mutex);
 }
 static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
@@ -779,7 +779,7 @@ __releases(&cde_app->mutex)
                return;
        }
-        mutex_lock(&cde_app->mutex);
+        nvgpu_mutex_acquire(&cde_app->mutex);
        if (cde_ctx->in_use || !cde_app->initialised) {
                gk20a_dbg(gpu_dbg_cde_ctx,
                                "cde: context use raced, not deleting %p",
@@ -797,7 +797,7 @@ __releases(&cde_app->mutex)
                        cde_app->ctx_count_top);
 out:
-        mutex_unlock(&cde_app->mutex);
+        nvgpu_mutex_release(&cde_app->mutex);
        gk20a_idle(dev);
 }
@@ -876,9 +876,9 @@ __acquires(&cde_app->mutex)
                        break;
                /* exhausted, retry */
-                mutex_unlock(&cde_app->mutex);
+                nvgpu_mutex_release(&cde_app->mutex);
                cond_resched();
-                mutex_lock(&cde_app->mutex);
+                nvgpu_mutex_acquire(&cde_app->mutex);
        } while (!nvgpu_timeout_expired(&timeout));
        return cde_ctx;
@@ -946,7 +946,7 @@ __releases(&cde_app->mutex)
            scatterbuffer_byte_offset < compbits_byte_offset)
                return -EINVAL;
-        mutex_lock(&g->cde_app.mutex);
+        nvgpu_mutex_acquire(&g->cde_app.mutex);
        cde_ctx = gk20a_cde_get_context(g);
        if (IS_ERR(cde_ctx)) {
@@ -1118,7 +1118,7 @@ exit_unlock:
        if (surface)
                dma_buf_vunmap(compbits_scatter_buf, surface);
-        mutex_unlock(&g->cde_app.mutex);
+        nvgpu_mutex_release(&g->cde_app.mutex);
        return err;
 }
@@ -1155,13 +1155,13 @@ __releases(&cde_app->mutex)
                                        "cde: channel had timed out"
                                        ", reloading");
                        /* mark it to be deleted, replace with a new one */
-                        mutex_lock(&cde_app->mutex);
+                        nvgpu_mutex_acquire(&cde_app->mutex);
                        cde_ctx->is_temporary = true;
                        if (gk20a_cde_create_context(g)) {
                                gk20a_err(cde_ctx->dev,
                                                "cde: can't replace context");
                        }
-                        mutex_unlock(&cde_app->mutex);
+                        nvgpu_mutex_release(&cde_app->mutex);
                }
        }
@@ -1274,7 +1274,7 @@ __releases(&cde_app->mutex)
        if (err)
                return err;
-        mutex_lock(&cde_app->mutex);
+        nvgpu_mutex_acquire(&cde_app->mutex);
        gk20a_cde_stop(g);
@@ -1282,7 +1282,7 @@ __releases(&cde_app->mutex)
        if (!err)
                cde_app->initialised = true;
-        mutex_unlock(&cde_app->mutex);
+        nvgpu_mutex_release(&cde_app->mutex);
        gk20a_idle(g->dev);
        return err;
@@ -1300,8 +1300,8 @@ __releases(&cde_app->mutex)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
-        mutex_init(&cde_app->mutex);
+        nvgpu_mutex_init(&cde_app->mutex);
-        mutex_lock(&cde_app->mutex);
+        nvgpu_mutex_acquire(&cde_app->mutex);
        INIT_LIST_HEAD(&cde_app->free_contexts);
        INIT_LIST_HEAD(&cde_app->used_contexts);
@@ -1313,7 +1313,7 @@ __releases(&cde_app->mutex)
        if (!err)
                cde_app->initialised = true;
-        mutex_unlock(&cde_app->mutex);
+        nvgpu_mutex_release(&cde_app->mutex);
        gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
        return err;
 }
@@ -1561,7 +1561,7 @@ int gk20a_prepare_compressible_read(
        missing_bits = (state->valid_compbits ^ request) & request;
-        mutex_lock(&state->lock);
+        nvgpu_mutex_acquire(&state->lock);
        if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
@@ -1599,7 +1599,7 @@ int gk20a_prepare_compressible_read(
                *zbc_color = state->zbc_color;
 out:
-        mutex_unlock(&state->lock);
+        nvgpu_mutex_release(&state->lock);
        dma_buf_put(dmabuf);
        return err;
 }
@@ -1624,7 +1624,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
                return err;
        }
-        mutex_lock(&state->lock);
+        nvgpu_mutex_acquire(&state->lock);
        /* Update the compbits state. */
        state->valid_compbits = valid_compbits;
@@ -1634,7 +1634,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
        gk20a_fence_put(state->fence);
        state->fence = NULL;
-        mutex_unlock(&state->lock);
+        nvgpu_mutex_release(&state->lock);
        dma_buf_put(dmabuf);
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
index 8cdba938..1136b0ad 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -1,7 +1,7 @@
 /*
 * GK20A color decompression engine support
 *
- * Copyright (c) 2014-2016, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -262,7 +262,7 @@ struct gk20a_cde_ctx {
 struct gk20a_cde_app {
        bool initialised;
-        struct mutex mutex;
+        struct nvgpu_mutex mutex;
        struct list_head free_contexts;
        struct list_head used_contexts;
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 023c959e..fd248313 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -107,7 +107,7 @@ static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event)
        if (!ce_app->initialised)
                return;
-        mutex_lock(&ce_app->app_mutex);
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
        list_for_each_entry_safe(ce_ctx, ce_ctx_save,
                        &ce_app->allocated_contexts, list) {
@@ -117,7 +117,7 @@ static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event)
                }
        }
-        mutex_unlock(&ce_app->app_mutex);
+        nvgpu_mutex_release(&ce_app->app_mutex);
 }
 static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data)
@@ -183,14 +183,14 @@ static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_c
        }
 }
-/* assume this api should need to call under mutex_lock(&ce_app->app_mutex) */
+/* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */
 static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
 {
        struct list_head *list = &ce_ctx->list;
        ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
-        mutex_lock(&ce_ctx->gpu_ctx_mutex);
+        nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
        if (ce_ctx->cmd_buf_mem.cpu_va) {
                gk20a_ce_free_command_buffer_stored_fence(ce_ctx);
@@ -205,8 +205,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
        if (list->prev && list->next)
                list_del(list);
-        mutex_unlock(&ce_ctx->gpu_ctx_mutex);
+        nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
-        mutex_destroy(&ce_ctx->gpu_ctx_mutex);
+        nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex);
        kfree(ce_ctx);
 }
@@ -353,8 +353,8 @@ int gk20a_init_ce_support(struct gk20a *g)
        gk20a_dbg(gpu_dbg_fn, "ce: init");
-        mutex_init(&ce_app->app_mutex);
+        nvgpu_mutex_init(&ce_app->app_mutex);
-        mutex_lock(&ce_app->app_mutex);
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
        INIT_LIST_HEAD(&ce_app->allocated_contexts);
        ce_app->ctx_count = 0;
@@ -362,7 +362,7 @@ int gk20a_init_ce_support(struct gk20a *g)
        ce_app->initialised = true;
        ce_app->app_state = NVGPU_CE_ACTIVE;
-        mutex_unlock(&ce_app->app_mutex);
+        nvgpu_mutex_release(&ce_app->app_mutex);
        gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished");
        return 0;
@@ -379,7 +379,7 @@ void gk20a_ce_destroy(struct gk20a *g)
        ce_app->app_state = NVGPU_CE_SUSPEND;
        ce_app->initialised = false;
-        mutex_lock(&ce_app->app_mutex);
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
        list_for_each_entry_safe(ce_ctx, ce_ctx_save,
                        &ce_app->allocated_contexts, list) {
@@ -390,8 +390,8 @@ void gk20a_ce_destroy(struct gk20a *g)
        ce_app->ctx_count = 0;
        ce_app->next_ctx_id = 0;
-        mutex_unlock(&ce_app->app_mutex);
+        nvgpu_mutex_release(&ce_app->app_mutex);
-        mutex_destroy(&ce_app->app_mutex);
+        nvgpu_mutex_destroy(&ce_app->app_mutex);
 }
 void gk20a_ce_suspend(struct gk20a *g)
@@ -428,7 +428,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
        if (!ce_ctx)
                return ctx_id;
-        mutex_init(&ce_ctx->gpu_ctx_mutex);
+        nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex);
        ce_ctx->g = g;
        ce_ctx->dev = g->dev;
@@ -508,20 +508,20 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
                }
        }
-        mutex_lock(&ce_app->app_mutex);
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
        ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
        list_add(&ce_ctx->list, &ce_app->allocated_contexts);
        ++ce_app->next_ctx_id;
        ++ce_app->ctx_count;
-        mutex_unlock(&ce_app->app_mutex);
+        nvgpu_mutex_release(&ce_app->app_mutex);
        ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
 end:
        if (ctx_id == (u32)~0) {
-                mutex_lock(&ce_app->app_mutex);
+                nvgpu_mutex_acquire(&ce_app->app_mutex);
                gk20a_ce_delete_gpu_context(ce_ctx);
-                mutex_unlock(&ce_app->app_mutex);
+                nvgpu_mutex_release(&ce_app->app_mutex);
        }
        return ctx_id;
@@ -558,7 +558,7 @@ int gk20a_ce_execute_ops(struct device *dev,
        if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
                goto end;
-        mutex_lock(&ce_app->app_mutex);
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
        list_for_each_entry_safe(ce_ctx, ce_ctx_save,
                        &ce_app->allocated_contexts, list) {
@@ -568,7 +568,7 @@ int gk20a_ce_execute_ops(struct device *dev,
                }
        }
-        mutex_unlock(&ce_app->app_mutex);
+        nvgpu_mutex_release(&ce_app->app_mutex);
        if (!found) {
                ret = -EINVAL;
@@ -580,7 +580,7 @@ int gk20a_ce_execute_ops(struct device *dev,
                goto end;
        }
-        mutex_lock(&ce_ctx->gpu_ctx_mutex);
+        nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
        ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
@@ -672,7 +672,7 @@ int gk20a_ce_execute_ops(struct device *dev,
        } else
                ret = -ENOMEM;
 noop:
-        mutex_unlock(&ce_ctx->gpu_ctx_mutex);
+        nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
 end:
        return ret;
 }
@@ -688,7 +688,7 @@ void gk20a_ce_delete_context(struct device *dev,
        if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
                return;
-        mutex_lock(&ce_app->app_mutex);
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
        list_for_each_entry_safe(ce_ctx, ce_ctx_save,
                        &ce_app->allocated_contexts, list) {
@@ -699,7 +699,7 @@ void gk20a_ce_delete_context(struct device *dev,
                }
        }
-        mutex_unlock(&ce_app->app_mutex);
+        nvgpu_mutex_release(&ce_app->app_mutex);
        return;
 }
 EXPORT_SYMBOL(gk20a_ce_delete_context);
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 3b53834d..1bb25dd1 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -3,7 +3,7 @@
 *
 * GK20A graphics copy engine (gr host)
 *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -88,7 +88,7 @@ enum {
 /* global ce app db */
 struct gk20a_ce_app {
        bool initialised;
-        struct mutex app_mutex;
+        struct nvgpu_mutex app_mutex;
        int app_state;
        struct list_head allocated_contexts;
@@ -101,7 +101,7 @@ struct gk20a_gpu_ctx {
        struct gk20a *g;
        struct device *dev;
        u32 ctx_id;
-        struct mutex gpu_ctx_mutex;
+        struct nvgpu_mutex gpu_ctx_mutex;
        int gpu_ctx_state;
        ce_event_callback user_event_callback;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 376a64b0..83a3a523 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -102,7 +102,7 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
        platform = gk20a_get_platform(f->g->dev);
-        mutex_lock(&f->free_chs_mutex);
+        nvgpu_mutex_acquire(&f->free_chs_mutex);
        if (!list_empty(&f->free_chs)) {
                ch = list_first_entry(&f->free_chs, struct channel_gk20a,
                                free_chs);
@@ -111,7 +111,7 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
                WARN_ON(ch->referenceable);
                f->used_channels++;
        }
-        mutex_unlock(&f->free_chs_mutex);
+        nvgpu_mutex_release(&f->free_chs_mutex);
        if (platform->aggressive_sync_destroy_thresh &&
                        (f->used_channels >
@@ -128,11 +128,11 @@ static void free_channel(struct fifo_gk20a *f,
        trace_gk20a_release_used_channel(ch->hw_chid);
        /* refcount is zero here and channel is in a freed/dead state */
-        mutex_lock(&f->free_chs_mutex);
+        nvgpu_mutex_acquire(&f->free_chs_mutex);
        /* add to head to increase visibility of timing-related bugs */
        list_add(&ch->free_chs, &f->free_chs);
        f->used_channels--;
-        mutex_unlock(&f->free_chs_mutex);
+        nvgpu_mutex_release(&f->free_chs_mutex);
        if (platform->aggressive_sync_destroy_thresh &&
                        (f->used_channels <
@@ -494,10 +494,10 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
        gk20a_channel_cancel_job_clean_up(ch, true);
        /* ensure no fences are pending */
-        mutex_lock(&ch->sync_lock);
+        nvgpu_mutex_acquire(&ch->sync_lock);
        if (ch->sync)
                ch->sync->set_min_eq_max(ch->sync);
-        mutex_unlock(&ch->sync_lock);
+        nvgpu_mutex_release(&ch->sync_lock);
        /* release all job semaphores (applies only to jobs that use
           semaphore synchronization) */
@@ -595,7 +595,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch)
 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 {
        /* disable existing cyclestats buffer */
-        mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+        nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
        if (ch->cyclestate.cyclestate_buffer_handler) {
                dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
                                ch->cyclestate.cyclestate_buffer);
@@ -604,7 +604,7 @@ static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
                ch->cyclestate.cyclestate_buffer = NULL;
                ch->cyclestate.cyclestate_buffer_size = 0;
        }
-        mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+        nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
 }
 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
@@ -654,12 +654,12 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
 {
        int ret;
-        mutex_lock(&ch->cs_client_mutex);
+        nvgpu_mutex_acquire(&ch->cs_client_mutex);
        if (ch->cs_client)
                ret = gr_gk20a_css_flush(ch, ch->cs_client);
        else
                ret = -EBADF;
-        mutex_unlock(&ch->cs_client_mutex);
+        nvgpu_mutex_release(&ch->cs_client_mutex);
        return ret;
 }
@@ -671,7 +671,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 {
        int ret;
-        mutex_lock(&ch->cs_client_mutex);
+        nvgpu_mutex_acquire(&ch->cs_client_mutex);
        if (ch->cs_client) {
                ret = -EEXIST;
        } else {
@@ -681,7 +681,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
                                        perfmon_id_start,
                                        &ch->cs_client);
        }
-        mutex_unlock(&ch->cs_client_mutex);
+        nvgpu_mutex_release(&ch->cs_client_mutex);
        return ret;
 }
@@ -690,14 +690,14 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
 {
        int ret;
-        mutex_lock(&ch->cs_client_mutex);
+        nvgpu_mutex_acquire(&ch->cs_client_mutex);
        if (ch->cs_client) {
                ret = gr_gk20a_css_detach(ch, ch->cs_client);
                ch->cs_client = NULL;
        } else {
                ret = 0;
        }
-        mutex_unlock(&ch->cs_client_mutex);
+        nvgpu_mutex_release(&ch->cs_client_mutex);
        return ret;
 }
@@ -824,9 +824,9 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
        memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
        /* set channel notifiers pointer */
-        mutex_lock(&ch->error_notifier_mutex);
+        nvgpu_mutex_acquire(&ch->error_notifier_mutex);
        ch->error_notifier_ref = dmabuf;
-        mutex_unlock(&ch->error_notifier_mutex);
+        nvgpu_mutex_release(&ch->error_notifier_mutex);
        return 0;
 }
@@ -857,14 +857,14 @@ void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 {
-        mutex_lock(&ch->error_notifier_mutex);
+        nvgpu_mutex_acquire(&ch->error_notifier_mutex);
        gk20a_set_error_notifier_locked(ch, error);
-        mutex_unlock(&ch->error_notifier_mutex);
+        nvgpu_mutex_release(&ch->error_notifier_mutex);
 }
 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
 {
-        mutex_lock(&ch->error_notifier_mutex);
+        nvgpu_mutex_acquire(&ch->error_notifier_mutex);
        if (ch->error_notifier_ref) {
                dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
                dma_buf_put(ch->error_notifier_ref);
@@ -872,7 +872,7 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
                ch->error_notifier = NULL;
                ch->error_notifier_va = NULL;
        }
-        mutex_unlock(&ch->error_notifier_mutex);
+        nvgpu_mutex_release(&ch->error_notifier_mutex);
 }
 static void gk20a_wait_until_counter_is_N(
@@ -927,16 +927,16 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        nvgpu_wait_for_deferred_interrupts(g);
        /* prevent new refs */
-        spin_lock(&ch->ref_obtain_lock);
+        nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
        if (!ch->referenceable) {
-                spin_unlock(&ch->ref_obtain_lock);
+                nvgpu_spinlock_release(&ch->ref_obtain_lock);
                gk20a_err(dev_from_gk20a(ch->g),
                          "Extra %s() called to channel %u",
                          __func__, ch->hw_chid);
                return;
        }
        ch->referenceable = false;
-        spin_unlock(&ch->ref_obtain_lock);
+        nvgpu_spinlock_release(&ch->ref_obtain_lock);
        /* matches with the initial reference in gk20a_open_new_channel() */
        atomic_dec(&ch->ref_count);
@@ -948,18 +948,18 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
                        __func__, "references");
        /* if engine reset was deferred, perform it now */
-        mutex_lock(&f->deferred_reset_mutex);
+        nvgpu_mutex_acquire(&f->deferred_reset_mutex);
        if (g->fifo.deferred_reset_pending) {
                gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
                           " deferred, running now");
                /* if lock is already taken, a reset is taking place
                so no need to repeat */
-                if (mutex_trylock(&g->fifo.gr_reset_mutex)) {
+                if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
                        gk20a_fifo_deferred_reset(g, ch);
-                        mutex_unlock(&g->fifo.gr_reset_mutex);
+                        nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
                }
        }
-        mutex_unlock(&f->deferred_reset_mutex);
+        nvgpu_mutex_release(&f->deferred_reset_mutex);
        if (!gk20a_channel_as_bound(ch))
                goto unbind;
@@ -991,12 +991,12 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        channel_gk20a_free_priv_cmdbuf(ch);
        /* sync must be destroyed before releasing channel vm */
-        mutex_lock(&ch->sync_lock);
+        nvgpu_mutex_acquire(&ch->sync_lock);
        if (ch->sync) {
                gk20a_channel_sync_destroy(ch->sync);
                ch->sync = NULL;
        }
-        mutex_unlock(&ch->sync_lock);
+        nvgpu_mutex_release(&ch->sync_lock);
        /*
         * free the channel used semaphore index.
@@ -1011,10 +1011,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
         */
        gk20a_vm_put(ch_vm);
-        spin_lock(&ch->update_fn_lock);
+        nvgpu_spinlock_acquire(&ch->update_fn_lock);
        ch->update_fn = NULL;
        ch->update_fn_data = NULL;
-        spin_unlock(&ch->update_fn_lock);
+        nvgpu_spinlock_release(&ch->update_fn_lock);
        cancel_work_sync(&ch->update_fn_work);
        cancel_delayed_work_sync(&ch->clean_up.wq);
        cancel_delayed_work_sync(&ch->timeout.wq);
@@ -1037,21 +1037,21 @@ unbind:
        WARN_ON(ch->sync);
        /* unlink all debug sessions */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        list_for_each_entry_safe(session_data, tmp_s,
                                &ch->dbg_s_list, dbg_s_entry) {
                dbg_s = session_data->dbg_s;
-                mutex_lock(&dbg_s->ch_list_lock);
+                nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
                list_for_each_entry_safe(ch_data, tmp,
                                        &dbg_s->ch_list, ch_entry) {
                        if (ch_data->chid == ch->hw_chid)
                                dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
                }
-                mutex_unlock(&dbg_s->ch_list_lock);
+                nvgpu_mutex_release(&dbg_s->ch_list_lock);
        }
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        /* free pre-allocated resources, if applicable */
        if (channel_gk20a_is_prealloc_enabled(ch))
@@ -1079,7 +1079,7 @@ static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
        unsigned long prev_jiffies = 0;
        struct device *dev = dev_from_gk20a(ch->g);
-        spin_lock(&ch->ref_actions_lock);
+        nvgpu_spinlock_acquire(&ch->ref_actions_lock);
        dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n",
                        ch->hw_chid, atomic_read(&ch->ref_count));
@@ -1109,7 +1109,7 @@ static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
                get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
        }
-        spin_unlock(&ch->ref_actions_lock);
+        nvgpu_spinlock_release(&ch->ref_actions_lock);
 #endif
 }
@@ -1119,7 +1119,7 @@ static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
        struct channel_gk20a_ref_action *act;
-        spin_lock(&ch->ref_actions_lock);
+        nvgpu_spinlock_acquire(&ch->ref_actions_lock);
        act = &ch->ref_actions[ch->ref_actions_put];
        act->type = type;
@@ -1132,7 +1132,7 @@ static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
        ch->ref_actions_put = (ch->ref_actions_put + 1) %
                GK20A_CHANNEL_REFCOUNT_TRACKING;
-        spin_unlock(&ch->ref_actions_lock);
+        nvgpu_spinlock_release(&ch->ref_actions_lock);
 #endif
 }
@@ -1152,7 +1152,7 @@ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
                                         const char *caller) {
        struct channel_gk20a *ret;
-        spin_lock(&ch->ref_obtain_lock);
+        nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
        if (likely(ch->referenceable)) {
                gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
@@ -1161,7 +1161,7 @@ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
        } else
                ret = NULL;
-        spin_unlock(&ch->ref_obtain_lock);
+        nvgpu_spinlock_release(&ch->ref_obtain_lock);
        if (ret)
                trace_gk20a_channel_get(ch->hw_chid, caller);
@@ -1250,10 +1250,10 @@ static void gk20a_channel_update_runcb_fn(struct work_struct *work)
        void (*update_fn)(struct channel_gk20a *, void *);
        void *update_fn_data;
-        spin_lock(&ch->update_fn_lock);
+        nvgpu_spinlock_acquire(&ch->update_fn_lock);
        update_fn = ch->update_fn;
        update_fn_data = ch->update_fn_data;
-        spin_unlock(&ch->update_fn_lock);
+        nvgpu_spinlock_release(&ch->update_fn_lock);
        if (update_fn)
                update_fn(ch, update_fn_data);
@@ -1268,10 +1268,10 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
        struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
        if (ch) {
-                spin_lock(&ch->update_fn_lock);
+                nvgpu_spinlock_acquire(&ch->update_fn_lock);
                ch->update_fn = update_fn;
                ch->update_fn_data = update_fn_data;
-                spin_unlock(&ch->update_fn_lock);
+                nvgpu_spinlock_release(&ch->update_fn_lock);
        }
        return ch;
@@ -1325,13 +1325,13 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
        ch->tgid = current->tgid;  /* process granularity for FECS traces */
        /* unhook all events created on this channel */
-        mutex_lock(&ch->event_id_list_lock);
+        nvgpu_mutex_acquire(&ch->event_id_list_lock);
        list_for_each_entry_safe(event_id_data, event_id_data_temp,
                                &ch->event_id_list,
                                event_id_node) {
                list_del_init(&event_id_data->event_id_node);
        }
-        mutex_unlock(&ch->event_id_list_lock);
+        nvgpu_mutex_release(&ch->event_id_list_lock);
        /* By default, channel is regular (non-TSG) channel */
        ch->tsgid = NVGPU_INVALID_TSG_ID;
@@ -1357,7 +1357,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
        ch->update_fn = NULL;
        ch->update_fn_data = NULL;
-        spin_lock_init(&ch->update_fn_lock);
+        nvgpu_spinlock_init(&ch->update_fn_lock);
        INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
        /* Mark the channel alive, get-able, with 1 initial use
@@ -1652,17 +1652,17 @@ static void channel_gk20a_free_job(struct channel_gk20a *c,
 void channel_gk20a_joblist_lock(struct channel_gk20a *c)
 {
        if (channel_gk20a_is_prealloc_enabled(c))
-                mutex_lock(&c->joblist.pre_alloc.read_lock);
+                nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
        else
-                spin_lock(&c->joblist.dynamic.lock);
+                nvgpu_spinlock_acquire(&c->joblist.dynamic.lock);
 }
 void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
 {
        if (channel_gk20a_is_prealloc_enabled(c))
-                mutex_unlock(&c->joblist.pre_alloc.read_lock);
+                nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
        else
-                spin_unlock(&c->joblist.dynamic.lock);
+                nvgpu_spinlock_release(&c->joblist.dynamic.lock);
 }
 static struct channel_gk20a_job *channel_gk20a_joblist_peek(
@@ -1871,14 +1871,14 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        channel_gk20a_setup_userd(c);
        if (!platform->aggressive_sync_destroy_thresh) {
-                mutex_lock(&c->sync_lock);
+                nvgpu_mutex_acquire(&c->sync_lock);
                c->sync = gk20a_channel_sync_create(c);
                if (!c->sync) {
                        err = -ENOMEM;
-                        mutex_unlock(&c->sync_lock);
+                        nvgpu_mutex_release(&c->sync_lock);
                        goto clean_up_unmap;
                }
-                mutex_unlock(&c->sync_lock);
+                nvgpu_mutex_release(&c->sync_lock);
                if (g->ops.fifo.resetup_ramfc) {
                        err = g->ops.fifo.resetup_ramfc(c);
@@ -2085,16 +2085,16 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
        if (!ch->wdt_enabled)
                return;
-        raw_spin_lock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
        if (ch->timeout.initialized) {
-                raw_spin_unlock(&ch->timeout.lock);
+                nvgpu_raw_spinlock_release(&ch->timeout.lock);
                return;
        }
        ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
        ch->timeout.initialized = true;
-        raw_spin_unlock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_release(&ch->timeout.lock);
        schedule_delayed_work(&ch->timeout.wq,
               msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
@@ -2102,18 +2102,18 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
 static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
 {
-        raw_spin_lock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
        if (!ch->timeout.initialized) {
-                raw_spin_unlock(&ch->timeout.lock);
+                nvgpu_raw_spinlock_release(&ch->timeout.lock);
                return;
        }
-        raw_spin_unlock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_release(&ch->timeout.lock);
        cancel_delayed_work_sync(&ch->timeout.wq);
-        raw_spin_lock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
        ch->timeout.initialized = false;
-        raw_spin_unlock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_release(&ch->timeout.lock);
 }
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
@@ -2125,13 +2125,13 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
                struct channel_gk20a *ch = &f->channel[chid];
                if (gk20a_channel_get(ch)) {
-                        raw_spin_lock(&ch->timeout.lock);
+                        nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
                        if (!ch->timeout.initialized) {
-                                raw_spin_unlock(&ch->timeout.lock);
+                                nvgpu_raw_spinlock_release(&ch->timeout.lock);
                                gk20a_channel_put(ch);
                                continue;
                        }
-                        raw_spin_unlock(&ch->timeout.lock);
+                        nvgpu_raw_spinlock_release(&ch->timeout.lock);
                        cancel_delayed_work_sync(&ch->timeout.wq);
                        if (!ch->has_timedout)
@@ -2164,13 +2164,13 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
        }
        /* Need global lock since multiple channels can timeout at a time */
-        mutex_lock(&g->ch_wdt_lock);
+        nvgpu_mutex_acquire(&g->ch_wdt_lock);
        /* Get timed out job and reset the timer */
-        raw_spin_lock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
        gp_get = ch->timeout.gp_get;
        ch->timeout.initialized = false;
-        raw_spin_unlock(&ch->timeout.lock);
+        nvgpu_raw_spinlock_release(&ch->timeout.lock);
        if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
                gk20a_channel_timeout_start(ch);
@@ -2187,7 +2187,7 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
                NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
 fail_unlock:
-        mutex_unlock(&g->ch_wdt_lock);
+        nvgpu_mutex_release(&g->ch_wdt_lock);
        gk20a_channel_put(ch);
        gk20a_idle(dev_from_gk20a(g));
 }
@@ -2216,17 +2216,17 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
 {
-        mutex_lock(&c->clean_up.lock);
+        nvgpu_mutex_acquire(&c->clean_up.lock);
        if (c->clean_up.scheduled) {
-                mutex_unlock(&c->clean_up.lock);
+                nvgpu_mutex_release(&c->clean_up.lock);
                return;
        }
        c->clean_up.scheduled = true;
        schedule_delayed_work(&c->clean_up.wq, 1);
-        mutex_unlock(&c->clean_up.lock);
+        nvgpu_mutex_release(&c->clean_up.lock);
 }
 static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
@@ -2235,9 +2235,9 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
        if (wait_for_completion)
                cancel_delayed_work_sync(&c->clean_up.wq);
-        mutex_lock(&c->clean_up.lock);
+        nvgpu_mutex_acquire(&c->clean_up.lock);
        c->clean_up.scheduled = false;
-        mutex_unlock(&c->clean_up.lock);
+        nvgpu_mutex_release(&c->clean_up.lock);
 }
 static int gk20a_channel_add_job(struct channel_gk20a *c,
@@ -2353,13 +2353,13 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                        c->sync->signal_timeline(c->sync);
                        if (platform->aggressive_sync_destroy_thresh) {
-                                mutex_lock(&c->sync_lock);
+                                nvgpu_mutex_acquire(&c->sync_lock);
                                if (atomic_dec_and_test(&c->sync->refcount) &&
                                                platform->aggressive_sync_destroy) {
                                        gk20a_channel_sync_destroy(c->sync);
                                        c->sync = NULL;
                                }
-                                mutex_unlock(&c->sync_lock);
+                                nvgpu_mutex_release(&c->sync_lock);
                        }
                }
@@ -2563,18 +2563,18 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
                need_sync_fence = true;
        if (platform->aggressive_sync_destroy_thresh) {
-                mutex_lock(&c->sync_lock);
+                nvgpu_mutex_acquire(&c->sync_lock);
                if (!c->sync) {
                        c->sync = gk20a_channel_sync_create(c);
                        if (!c->sync) {
                                err = -ENOMEM;
-                                mutex_unlock(&c->sync_lock);
+                                nvgpu_mutex_release(&c->sync_lock);
                                goto fail;
                        }
                        new_sync_created = true;
                }
                atomic_inc(&c->sync->refcount);
-                mutex_unlock(&c->sync_lock);
+                nvgpu_mutex_release(&c->sync_lock);
        }
        if (g->ops.fifo.resetup_ramfc && new_sync_created) {
@@ -2920,31 +2920,31 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
        c->g = NULL;
        c->hw_chid = chid;
        atomic_set(&c->bound, false);
-        spin_lock_init(&c->ref_obtain_lock);
+        nvgpu_spinlock_init(&c->ref_obtain_lock);
        atomic_set(&c->ref_count, 0);
        c->referenceable = false;
        init_waitqueue_head(&c->ref_count_dec_wq);
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
-        spin_lock_init(&c->ref_actions_lock);
+        nvgpu_spinlock_init(&c->ref_actions_lock);
 #endif
-        mutex_init(&c->ioctl_lock);
+        nvgpu_mutex_init(&c->ioctl_lock);
-        mutex_init(&c->error_notifier_mutex);
+        nvgpu_mutex_init(&c->error_notifier_mutex);
-        spin_lock_init(&c->joblist.dynamic.lock);
+        nvgpu_spinlock_init(&c->joblist.dynamic.lock);
-        mutex_init(&c->joblist.pre_alloc.read_lock);
+        nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
-        raw_spin_lock_init(&c->timeout.lock);
+        nvgpu_raw_spinlock_init(&c->timeout.lock);
-        mutex_init(&c->sync_lock);
+        nvgpu_mutex_init(&c->sync_lock);
        INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
        INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
-        mutex_init(&c->clean_up.lock);
+        nvgpu_mutex_init(&c->clean_up.lock);
        INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-        mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
+        nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
-        mutex_init(&c->cs_client_mutex);
+        nvgpu_mutex_init(&c->cs_client_mutex);
 #endif
        INIT_LIST_HEAD(&c->dbg_s_list);
        INIT_LIST_HEAD(&c->event_id_list);
-        mutex_init(&c->event_id_list_lock);
+        nvgpu_mutex_init(&c->event_id_list_lock);
-        mutex_init(&c->dbg_s_lock);
+        nvgpu_mutex_init(&c->dbg_s_lock);
        list_add(&c->free_chs, &g->fifo.free_chs);
        return 0;
@@ -3102,7 +3102,7 @@ static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
        poll_wait(filep, &event_id_data->event_id_wq, wait);
-        mutex_lock(&event_id_data->lock);
+        nvgpu_mutex_acquire(&event_id_data->lock);
        if (event_id_data->is_tsg) {
                struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
@@ -3127,7 +3127,7 @@ static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
                }
        }
-        mutex_unlock(&event_id_data->lock);
+        nvgpu_mutex_release(&event_id_data->lock);
        return mask;
 }
@@ -3140,15 +3140,15 @@ static int gk20a_event_id_release(struct inode *inode, struct file *filp)
        if (event_id_data->is_tsg) {
                struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
-                mutex_lock(&tsg->event_id_list_lock);
+                nvgpu_mutex_acquire(&tsg->event_id_list_lock);
                list_del_init(&event_id_data->event_id_node);
-                mutex_unlock(&tsg->event_id_list_lock);
+                nvgpu_mutex_release(&tsg->event_id_list_lock);
        } else {
                struct channel_gk20a *ch = g->fifo.channel + event_id_data->id;
-                mutex_lock(&ch->event_id_list_lock);
+                nvgpu_mutex_acquire(&ch->event_id_list_lock);
                list_del_init(&event_id_data->event_id_node);
-                mutex_unlock(&ch->event_id_list_lock);
+                nvgpu_mutex_release(&ch->event_id_list_lock);
        }
        kfree(event_id_data);
@@ -3170,7 +3170,7 @@ static int gk20a_channel_get_event_data_from_id(struct channel_gk20a *ch,
        struct gk20a_event_id_data *local_event_id_data;
        bool event_found = false;
-        mutex_lock(&ch->event_id_list_lock);
+        nvgpu_mutex_acquire(&ch->event_id_list_lock);
        list_for_each_entry(local_event_id_data, &ch->event_id_list,
                                                 event_id_node) {
                if (local_event_id_data->event_id == event_id) {
@@ -3178,7 +3178,7 @@ static int gk20a_channel_get_event_data_from_id(struct channel_gk20a *ch,
                        break;
                }
        }
-        mutex_unlock(&ch->event_id_list_lock);
+        nvgpu_mutex_release(&ch->event_id_list_lock);
        if (event_found) {
                *event_id_data = local_event_id_data;
@@ -3199,7 +3199,7 @@ void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
        if (err)
                return;
-        mutex_lock(&event_id_data->lock);
+        nvgpu_mutex_acquire(&event_id_data->lock);
        gk20a_dbg_info(
                "posting event for event_id=%d on ch=%d\n",
@@ -3208,7 +3208,7 @@ void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
        wake_up_interruptible_all(&event_id_data->event_id_wq);
-        mutex_unlock(&event_id_data->lock);
+        nvgpu_mutex_release(&event_id_data->lock);
 }
 static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
@@ -3253,12 +3253,12 @@ static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
        event_id_data->event_id = event_id;
        init_waitqueue_head(&event_id_data->event_id_wq);
-        mutex_init(&event_id_data->lock);
+        nvgpu_mutex_init(&event_id_data->lock);
        INIT_LIST_HEAD(&event_id_data->event_id_node);
-        mutex_lock(&ch->event_id_list_lock);
+        nvgpu_mutex_acquire(&ch->event_id_list_lock);
        list_add_tail(&event_id_data->event_id_node, &ch->event_id_list);
-        mutex_unlock(&ch->event_id_list_lock);
+        nvgpu_mutex_release(&ch->event_id_list_lock);
        fd_install(local_fd, file);
        file->private_data = event_id_data;
@@ -3569,7 +3569,7 @@ long gk20a_channel_ioctl(struct file *filp,
        /* protect our sanity for threaded userspace - most of the channel is
         * not thread safe */
-        mutex_lock(&ch->ioctl_lock);
+        nvgpu_mutex_acquire(&ch->ioctl_lock);
        /* this ioctl call keeps a ref to the file which keeps a ref to the
         * channel */
@@ -3660,12 +3660,12 @@ long gk20a_channel_ioctl(struct file *filp,
                /* waiting is thread-safe, not dropping this mutex could
                 * deadlock in certain conditions */
-                mutex_unlock(&ch->ioctl_lock);
+                nvgpu_mutex_release(&ch->ioctl_lock);
                err = gk20a_channel_wait(ch,
                                (struct nvgpu_wait_args *)buf);
-                mutex_lock(&ch->ioctl_lock);
+                nvgpu_mutex_acquire(&ch->ioctl_lock);
                gk20a_idle(dev);
                break;
@@ -3899,7 +3899,7 @@ long gk20a_channel_ioctl(struct file *filp,
        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
                err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
-        mutex_unlock(&ch->ioctl_lock);
+        nvgpu_mutex_release(&ch->ioctl_lock);
        gk20a_channel_put(ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index f940a271..14ee9f69 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -19,15 +19,15 @@
 #define CHANNEL_GK20A_H
 #include <linux/log2.h>
-#include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/semaphore.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/stacktrace.h>
 #include <linux/wait.h>
 #include <uapi/linux/nvgpu.h>
+#include <nvgpu/lock.h>
 struct gk20a;
 struct gr_gk20a;
 struct dbg_session_gk20a;
@@ -80,18 +80,18 @@ struct channel_gk20a_joblist {
                unsigned int put;
                unsigned int get;
                struct channel_gk20a_job *jobs;
-                struct mutex read_lock;
+                struct nvgpu_mutex read_lock;
        } pre_alloc;
        struct {
                struct list_head jobs;
-                spinlock_t lock;
+                struct nvgpu_spinlock lock;
        } dynamic;
 };
 struct channel_gk20a_timeout {
        struct delayed_work wq;
-        raw_spinlock_t lock;
+        struct nvgpu_raw_spinlock lock;
        bool initialized;
        u32 gp_get;
 };
@@ -106,12 +106,12 @@ struct gk20a_event_id_data {
        bool event_posted;
        wait_queue_head_t event_id_wq;
-        struct mutex lock;
+        struct nvgpu_mutex lock;
        struct list_head event_id_node;
 };
 struct channel_gk20a_clean_up {
-        struct mutex lock;
+        struct nvgpu_mutex lock;
        bool scheduled;
        struct delayed_work wq;
 };
@@ -156,7 +156,7 @@ struct channel_gk20a {
        struct list_head free_chs;
-        spinlock_t ref_obtain_lock;
+        struct nvgpu_spinlock ref_obtain_lock;
        bool referenceable;
        atomic_t ref_count;
        wait_queue_head_t ref_count_dec_wq;
@@ -169,7 +169,7 @@ struct channel_gk20a {
        struct channel_gk20a_ref_action ref_actions[
                GK20A_CHANNEL_REFCOUNT_TRACKING];
        size_t ref_actions_put; /* index of next write */
-        spinlock_t ref_actions_lock;
+        struct nvgpu_spinlock ref_actions_lock;
 #endif
        struct nvgpu_semaphore_int *hw_sema;
@@ -183,7 +183,7 @@ struct channel_gk20a {
        bool cde;
        pid_t pid;
        pid_t tgid;
-        struct mutex ioctl_lock;
+        struct nvgpu_mutex ioctl_lock;
        int tsgid;
        struct list_head ch_entry; /* channel's entry in TSG */
@@ -221,17 +221,17 @@ struct channel_gk20a {
        void *cyclestate_buffer;
        u32 cyclestate_buffer_size;
        struct dma_buf *cyclestate_buffer_handler;
-        struct mutex cyclestate_buffer_mutex;
+        struct nvgpu_mutex cyclestate_buffer_mutex;
        } cyclestate;
-        struct mutex cs_client_mutex;
+        struct nvgpu_mutex cs_client_mutex;
        struct gk20a_cs_snapshot_client *cs_client;
 #endif
-        struct mutex dbg_s_lock;
+        struct nvgpu_mutex dbg_s_lock;
        struct list_head dbg_s_list;
        struct list_head event_id_list;
-        struct mutex event_id_list_lock;
+        struct nvgpu_mutex event_id_list_lock;
        bool has_timedout;
        u32 timeout_ms_max;
@@ -241,9 +241,9 @@ struct channel_gk20a {
        struct dma_buf *error_notifier_ref;
        struct nvgpu_notification *error_notifier;
        void *error_notifier_va;
-        struct mutex error_notifier_mutex;
+        struct nvgpu_mutex error_notifier_mutex;
-        struct mutex sync_lock;
+        struct nvgpu_mutex sync_lock;
        struct gk20a_channel_sync *sync;
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -254,7 +254,7 @@ struct channel_gk20a {
         * via schedule_work */
        void (*update_fn)(struct channel_gk20a *, void *);
        void *update_fn_data;
-        spinlock_t update_fn_lock; /* make access to the two above atomic */
+        struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */
        struct work_struct update_fn_work;
        u32 interleave_level;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 18971b09..097635a7 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -414,9 +414,9 @@ struct wait_fence_work {
 static void gk20a_add_pending_sema_wait(struct gk20a *g,
                                        struct wait_fence_work *work)
 {
-        raw_spin_lock(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
        list_add(&work->entry, &g->pending_sema_waits);
-        raw_spin_unlock(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
 }
 /*
@@ -426,9 +426,9 @@ static void gk20a_add_pending_sema_wait(struct gk20a *g,
 static void gk20a_start_sema_wait_cancel(struct gk20a *g,
                                         struct list_head *list)
 {
-        raw_spin_lock(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
        list_replace_init(&g->pending_sema_waits, list);
-        raw_spin_unlock(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
 }
 /*
@@ -486,10 +486,10 @@ static void gk20a_channel_semaphore_launcher(
         * This spinlock must protect a _very_ small critical section -
         * otherwise it's possible that the deterministic submit path suffers.
         */
-        raw_spin_lock(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
        if (!list_empty(&g->pending_sema_waits))
                list_del_init(&w->entry);
-        raw_spin_unlock(&g->pending_sema_waits_lock);
+        nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
        gk20a_dbg_info("waiting for pre fence %p '%s'",
                        fence, fence->name);
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
index 32690c90..38d13b4b 100644
--- a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Clocks
 *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -457,7 +457,7 @@ static int gk20a_init_clk_setup_sw(struct gk20a *g)
                clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
        }
-        mutex_init(&clk->clk_mutex);
+        nvgpu_mutex_init(&clk->clk_mutex);
        clk->sw_ready = true;
@@ -538,14 +538,14 @@ static int gk20a_clk_export_set_rate(void *data, unsigned long *rate)
        struct clk_gk20a *clk = &g->clk;
        if (rate) {
-                mutex_lock(&clk->clk_mutex);
+                nvgpu_mutex_acquire(&clk->clk_mutex);
                old_freq = clk->gpc_pll.freq;
                ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
                if (!ret && clk->gpc_pll.enabled)
                        ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq);
                if (!ret)
                        *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-                mutex_unlock(&clk->clk_mutex);
+                nvgpu_mutex_release(&clk->clk_mutex);
        }
        return ret;
 }
@@ -556,9 +556,9 @@ static int gk20a_clk_export_enable(void *data)
        struct gk20a *g = data;
        struct clk_gk20a *clk = &g->clk;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        return ret;
 }
@@ -567,10 +567,10 @@ static void gk20a_clk_export_disable(void *data)
        struct gk20a *g = data;
        struct clk_gk20a *clk = &g->clk;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (g->clk.clk_hw_on)
                clk_disable_gpcpll(g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
 }
 static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state)
@@ -578,12 +578,12 @@ static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state)
        struct gk20a *g = data;
        struct clk_gk20a *clk = &g->clk;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (state)
                *state = clk->gpc_pll.enabled;
        if (rate)
                *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
 }
 static struct tegra_clk_export_ops gk20a_clk_export_ops = {
@@ -640,11 +640,11 @@ static int gk20a_init_clk_support(struct gk20a *g)
        if (err)
                return err;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        clk->clk_hw_on = true;
        err = gk20a_init_clk_setup_hw(g);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        if (err)
                return err;
@@ -658,9 +658,9 @@ static int gk20a_init_clk_support(struct gk20a *g)
                return err;
        /* The prev call may not enable PLL if gbus is unbalanced - force it */
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        if (err)
                return err;
@@ -680,10 +680,10 @@ static int gk20a_suspend_clk_support(struct gk20a *g)
        clk_disable(g->clk.tegra_clk);
        /* The prev call may not disable PLL if gbus is unbalanced - force it */
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        ret = clk_disable_gpcpll(g, 1);
        g->clk.clk_hw_on = false;
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        return ret;
 }
@@ -714,10 +714,10 @@ static int pll_reg_show(struct seq_file *s, void *data)
        struct gk20a *g = s->private;
        u32 reg, m, n, pl, f;
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        if (!g->clk.clk_hw_on) {
                seq_printf(s, "gk20a powered down - no access to registers\n");
-                mutex_unlock(&g->clk.clk_mutex);
+                nvgpu_mutex_release(&g->clk.clk_mutex);
                return 0;
        }
@@ -733,7 +733,7 @@ static int pll_reg_show(struct seq_file *s, void *data)
        f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]);
        seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
        seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
index a45dfcb7..8260fd4a 100644
--- a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 - 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011 - 2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -16,7 +16,7 @@
 #ifndef CLK_GK20A_H
 #define CLK_GK20A_H
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
@@ -86,7 +86,7 @@ struct clk_gk20a {
 #endif
        struct pll gpc_pll;
        struct pll gpc_pll_last;
-        struct mutex clk_mutex;
+        struct nvgpu_mutex clk_mutex;
        struct namemap_cfg *clk_namemap;
        u32 namemap_num;
        u32 *namemap_xlat_table;
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index aa92796c..4bc7ee52 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
 *
- * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -19,7 +19,7 @@
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-buf.h>
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 #include <linux/vmalloc.h>
 #include "gk20a.h"
@@ -557,7 +557,7 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
        gr = &g->gr;
        *cs_client = NULL;
-        mutex_lock(&gr->cs_lock);
+        nvgpu_mutex_acquire(&gr->cs_lock);
        ret = css_gr_create_shared_data(gr);
        if (ret)
@@ -577,7 +577,7 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
        if (perfmon_start)
                *perfmon_start = (*cs_client)->perfmon_start;
-        mutex_unlock(&gr->cs_lock);
+        nvgpu_mutex_release(&gr->cs_lock);
        return 0;
@@ -591,7 +591,7 @@ failed:
                if (list_empty(&gr->cs_data->clients))
                        css_gr_free_shared_data(gr);
        }
-        mutex_unlock(&gr->cs_lock);
+        nvgpu_mutex_release(&gr->cs_lock);
        if (perfmon_start)
                *perfmon_start = 0;
@@ -610,7 +610,7 @@ int gr_gk20a_css_detach(struct channel_gk20a *ch,
                return -EINVAL;
        gr = &g->gr;
-        mutex_lock(&gr->cs_lock);
+        nvgpu_mutex_acquire(&gr->cs_lock);
        if (gr->cs_data) {
                struct gk20a_cs_snapshot *data = gr->cs_data;
@@ -623,7 +623,7 @@ int gr_gk20a_css_detach(struct channel_gk20a *ch,
        } else {
                ret = -EBADF;
        }
-        mutex_unlock(&gr->cs_lock);
+        nvgpu_mutex_release(&gr->cs_lock);
        return ret;
 }
@@ -639,9 +639,9 @@ int gr_gk20a_css_flush(struct channel_gk20a *ch,
                return -EINVAL;
        gr = &g->gr;
-        mutex_lock(&gr->cs_lock);
+        nvgpu_mutex_acquire(&gr->cs_lock);
        ret = css_gr_flush_snapshots(ch);
-        mutex_unlock(&gr->cs_lock);
+        nvgpu_mutex_release(&gr->cs_lock);
        return ret;
 }
@@ -651,10 +651,10 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
 {
        struct gr_gk20a *gr = &g->gr;
-        mutex_lock(&gr->cs_lock);
+        nvgpu_mutex_acquire(&gr->cs_lock);
        css_gr_free_shared_data(gr);
-        mutex_unlock(&gr->cs_lock);
+        nvgpu_mutex_release(&gr->cs_lock);
-        mutex_destroy(&gr->cs_lock);
+        nvgpu_mutex_destroy(&gr->cs_lock);
 }
 static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 5c9baf77..351be55e 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -349,7 +349,7 @@ static int nvgpu_gpu_ioctl_inval_icache(
        ops.offset       = gr_pri_gpc0_gcc_dbg_r();
        /* Take the global lock, since we'll be doing global regops */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
@@ -371,7 +371,7 @@ static int nvgpu_gpu_ioctl_inval_icache(
        gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
 end:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -384,9 +384,9 @@ static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
                return -EINVAL;
        }
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        g->ops.mm.set_debug_mode(g, args->state == 1);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        gk20a_idle(g->dev);
        return 0;
@@ -403,13 +403,13 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
        if (!ch)
                return -EINVAL;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        if (g->ops.gr.set_sm_debug_mode)
                err = g->ops.gr.set_sm_debug_mode(g, ch,
                                args->sms, !!args->enable);
        else
                err = -ENOSYS;
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -419,7 +419,7 @@ static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
        int err = 0;
        u32 dbgr_control0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        /* assert stop trigger. uniformity assumption: all SMs will have
         * the same state in dbg_control0. */
        dbgr_control0 =
@@ -430,7 +430,7 @@ static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
        gk20a_writel(g,
                gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -456,7 +456,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
                          gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
                          gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        /* Lock down all SMs */
        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
@@ -482,7 +482,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
        }
 end:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        kfree(w_state);
        return err;
 }
@@ -491,7 +491,7 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
 {
        int err = 0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        /* Clear the pause mask to tell the GPU we want to resume everyone */
        gk20a_writel(g,
@@ -505,7 +505,7 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
         * then a 1 to the run trigger */
        gk20a_resume_all_sms(g);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -551,7 +551,7 @@ static int nvgpu_gpu_ioctl_has_any_exception(
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
@@ -565,7 +565,7 @@ static int nvgpu_gpu_ioctl_has_any_exception(
                tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
        }
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        args->tpc_exception_en_sm_mask = tpc_exception_en;
        return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 705eccaa..ffd15a37 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -47,7 +47,7 @@ struct gk20a_ctxsw_dev {
        atomic_t vma_ref;
-        struct mutex write_lock;
+        struct nvgpu_mutex write_lock;
 };
@@ -83,16 +83,16 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
                "filp=%p buf=%p size=%zu", filp, buf, size);
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        while (ring_is_empty(hdr)) {
-                mutex_unlock(&dev->write_lock);
+                nvgpu_mutex_release(&dev->write_lock);
                if (filp->f_flags & O_NONBLOCK)
                        return -EAGAIN;
                err = wait_event_interruptible(dev->readout_wq,
                        !ring_is_empty(hdr));
                if (err)
                        return err;
-                mutex_lock(&dev->write_lock);
+                nvgpu_mutex_acquire(&dev->write_lock);
        }
        while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
@@ -101,7 +101,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
                if (copy_to_user(entry, &dev->ents[hdr->read_idx],
                        sizeof(*entry))) {
-                        mutex_unlock(&dev->write_lock);
+                        nvgpu_mutex_release(&dev->write_lock);
                        return -EFAULT;
                }
@@ -118,7 +118,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
                hdr->read_idx);
        *off = hdr->read_idx;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return copied;
 }
@@ -126,9 +126,9 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
 static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
 {
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        dev->write_enabled = true;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        dev->g->ops.fecs_trace.enable(dev->g);
        return 0;
 }
@@ -137,9 +137,9 @@ static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
 {
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
        dev->g->ops.fecs_trace.disable(dev->g);
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        dev->write_enabled = false;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return 0;
 }
@@ -211,9 +211,9 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
        if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
                return -EINVAL;
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return ret;
 }
@@ -223,9 +223,9 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
 {
        struct gk20a *g = dev->g;
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        dev->filter = args->filter;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        if (g->ops.fecs_trace.set_filter)
                g->ops.fecs_trace.set_filter(g, &dev->filter);
@@ -235,9 +235,9 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
 static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
        struct nvgpu_ctxsw_trace_filter_args *args)
 {
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        args->filter = dev->filter;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return 0;
 }
@@ -293,7 +293,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
        /* Allow only one user for this device */
        dev = &trace->devs[vmid];
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        if (dev->hdr) {
                err = -EBUSY;
                goto done;
@@ -321,7 +321,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
        }
 done:
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
 idle:
        gk20a_idle(g->dev);
@@ -338,9 +338,9 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
        g->ops.fecs_trace.disable(g);
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        dev->write_enabled = false;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        if (dev->hdr) {
                dev->g->ops.fecs_trace.free_user_buffer(dev->g);
@@ -414,11 +414,11 @@ unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        poll_wait(filp, &dev->readout_wq, wait);
        if (!ring_is_empty(hdr))
                mask |= POLLIN | POLLRDNORM;
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return mask;
 }
@@ -482,7 +482,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g)
                dev->hdr = NULL;
                dev->write_enabled = false;
                init_waitqueue_head(&dev->readout_wq);
-                mutex_init(&dev->write_lock);
+                nvgpu_mutex_init(&dev->write_lock);
                atomic_set(&dev->vma_ref, 0);
                dev++;
        }
@@ -567,7 +567,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
                "dev=%p hdr=%p", dev, hdr);
-        mutex_lock(&dev->write_lock);
+        nvgpu_mutex_acquire(&dev->write_lock);
        if (unlikely(!hdr)) {
                /* device has been released */
@@ -621,7 +621,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
        gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
                hdr->read_idx, hdr->write_idx, ring_len(hdr));
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return ret;
 disable:
@@ -638,7 +638,7 @@ filter:
                        entry->tag, entry->timestamp, reason);
 done:
-        mutex_unlock(&dev->write_lock);
+        nvgpu_mutex_release(&dev->write_lock);
        return ret;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index ac11e378..f6290e1d 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -44,9 +44,9 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
        struct channel_gk20a *ch;
        struct gk20a *g = dbg_s->g;
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        if (list_empty(&dbg_s->ch_list)) {
-                mutex_unlock(&dbg_s->ch_list_lock);
+                nvgpu_mutex_release(&dbg_s->ch_list_lock);
                return NULL;
        }
@@ -55,7 +55,7 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
                                   ch_entry);
        ch = g->fifo.channel + ch_data->chid;
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
        return ch;
 }
@@ -116,8 +116,8 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
        init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
        INIT_LIST_HEAD(&dbg_session->ch_list);
-        mutex_init(&dbg_session->ch_list_lock);
+        nvgpu_mutex_init(&dbg_session->ch_list_lock);
-        mutex_init(&dbg_session->ioctl_lock);
+        nvgpu_mutex_init(&dbg_session->ioctl_lock);
        dbg_session->dbg_events.events_enabled = false;
        dbg_session->dbg_events.num_pending_events = 0;
@@ -127,61 +127,61 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 /* used in scenarios where the debugger session can take just the inter-session
 * lock for performance, but the profiler session must take the per-gpu lock
 * since it might not have an associated channel. */
-static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
+static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s)
 {
        struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
        if (dbg_s->is_profiler || !ch)
-                mutex_lock(&dbg_s->g->dbg_sessions_lock);
+                nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock);
        else
-                mutex_lock(&ch->dbg_s_lock);
+                nvgpu_mutex_acquire(&ch->dbg_s_lock);
 }
-static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
+static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s)
 {
        struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
        if (dbg_s->is_profiler || !ch)
-                mutex_unlock(&dbg_s->g->dbg_sessions_lock);
+                nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock);
        else
-                mutex_unlock(&ch->dbg_s_lock);
+                nvgpu_mutex_release(&ch->dbg_s_lock);
 }
 static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
 {
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
-        gk20a_dbg_session_mutex_lock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
        dbg_s->dbg_events.events_enabled = true;
        dbg_s->dbg_events.num_pending_events = 0;
-        gk20a_dbg_session_mutex_unlock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 }
 static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
 {
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
-        gk20a_dbg_session_mutex_lock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
        dbg_s->dbg_events.events_enabled = false;
        dbg_s->dbg_events.num_pending_events = 0;
-        gk20a_dbg_session_mutex_unlock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 }
 static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
 {
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
-        gk20a_dbg_session_mutex_lock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
        if (dbg_s->dbg_events.events_enabled &&
                        dbg_s->dbg_events.num_pending_events > 0)
                dbg_s->dbg_events.num_pending_events--;
-        gk20a_dbg_session_mutex_unlock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 }
 static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
@@ -232,7 +232,7 @@ unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
        poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
-        gk20a_dbg_session_mutex_lock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
        if (dbg_s->dbg_events.events_enabled &&
                        dbg_s->dbg_events.num_pending_events > 0) {
@@ -243,7 +243,7 @@ unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
                mask = (POLLPRI | POLLIN);
        }
-        gk20a_dbg_session_mutex_unlock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
        return mask;
 }
@@ -268,7 +268,7 @@ void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
        /* guard against the session list being modified */
-        mutex_lock(&ch->dbg_s_lock);
+        nvgpu_mutex_acquire(&ch->dbg_s_lock);
        list_for_each_entry(session_data, &ch->dbg_s_list, dbg_s_entry) {
                dbg_s = session_data->dbg_s;
@@ -284,7 +284,7 @@ void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
                }
        }
-        mutex_unlock(&ch->dbg_s_lock);
+        nvgpu_mutex_release(&ch->dbg_s_lock);
 }
 bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
@@ -296,7 +296,7 @@ bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
        /* guard against the session list being modified */
-        mutex_lock(&ch->dbg_s_lock);
+        nvgpu_mutex_acquire(&ch->dbg_s_lock);
        list_for_each_entry(session_data, &ch->dbg_s_list, dbg_s_entry) {
                dbg_s = session_data->dbg_s;
@@ -308,7 +308,7 @@ bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
                }
        }
-        mutex_unlock(&ch->dbg_s_lock);
+        nvgpu_mutex_release(&ch->dbg_s_lock);
        return broadcast;
 }
@@ -321,7 +321,7 @@ int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
        /* guard against the session list being modified */
-        mutex_lock(&ch->dbg_s_lock);
+        nvgpu_mutex_acquire(&ch->dbg_s_lock);
        list_for_each_entry(session_data, &ch->dbg_s_list, dbg_s_entry) {
                dbg_s = session_data->dbg_s;
@@ -332,7 +332,7 @@ int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
                }
        }
-        mutex_unlock(&ch->dbg_s_lock);
+        nvgpu_mutex_release(&ch->dbg_s_lock);
        return 0;
 }
@@ -407,12 +407,12 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
        struct dbg_session_channel_data *ch_data, *tmp;
        struct gk20a *g = dbg_s->g;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, ch_entry)
                dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
 }
@@ -435,25 +435,25 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
                return -EINVAL;
        }
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
                if (ch->hw_chid == ch_data->chid) {
                        channel_found = true;
                        break;
                }
        }
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
        if (!channel_found) {
                gk20a_dbg_fn("channel not bounded, fd=%d\n", args->channel_fd);
                return -EINVAL;
        }
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -472,11 +472,11 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
         * which called powergate/timeout disable ioctl, to be killed without
         * calling powergate/timeout enable ioctl
         */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
                                NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
        nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        kfree(dbg_s);
        return 0;
@@ -510,8 +510,8 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
        gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-        mutex_lock(&ch->dbg_s_lock);
+        nvgpu_mutex_acquire(&ch->dbg_s_lock);
        ch_data = kzalloc(sizeof(*ch_data), GFP_KERNEL);
        if (!ch_data) {
@@ -535,12 +535,12 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
        list_add(&session_data->dbg_s_entry, &ch->dbg_s_list);
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        list_add_tail(&ch_data->ch_entry, &dbg_s->ch_list);
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
-        mutex_unlock(&ch->dbg_s_lock);
+        nvgpu_mutex_release(&ch->dbg_s_lock);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
 }
@@ -591,9 +591,9 @@ static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
        gk20a_dbg_fn("powergate mode = %d", args->enable);
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        err = nvgpu_dbg_timeout_enable(dbg_s, args->enable);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -604,9 +604,9 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
        int status;
        struct gk20a *g = get_gk20a(dbg_s->dev);
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        status = g->timeouts_enabled;
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        if (status)
                args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE;
@@ -620,11 +620,11 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
 {
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
-        gk20a_dbg_session_mutex_lock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
        dbg_s->broadcast_stop_trigger = (args->broadcast != 0);
-        gk20a_dbg_session_mutex_unlock(dbg_s);
+        gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
        return 0;
 }
@@ -651,12 +651,12 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
                if (write_size > args->sm_error_state_record_size)
                        write_size = args->sm_error_state_record_size;
-                mutex_lock(&g->dbg_sessions_lock);
+                nvgpu_mutex_acquire(&g->dbg_sessions_lock);
                err = copy_to_user((void __user *)(uintptr_t)
                                                args->sm_error_state_record_mem,
                                   sm_error_state,
                                   write_size);
-                mutex_unlock(&g->dbg_sessions_lock);
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
                if (err) {
                        gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
                        return err;
@@ -728,12 +728,12 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
                if (read_size > args->sm_error_state_record_size)
                        read_size = args->sm_error_state_record_size;
-                mutex_lock(&g->dbg_sessions_lock);
+                nvgpu_mutex_acquire(&g->dbg_sessions_lock);
                err = copy_from_user(sm_error_state,
                          (void __user *)(uintptr_t)
                                args->sm_error_state_record_mem,
                          read_size);
-                mutex_unlock(&g->dbg_sessions_lock);
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
                if (err) {
                        err = -ENOMEM;
                        goto err_free;
@@ -901,7 +901,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
        }
        /* protect from threaded user space calls */
-        mutex_lock(&dbg_s->ioctl_lock);
+        nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
        switch (cmd) {
        case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
@@ -1007,7 +1007,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
-        mutex_unlock(&dbg_s->ioctl_lock);
+        nvgpu_mutex_release(&dbg_s->ioctl_lock);
        gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
@@ -1032,9 +1032,9 @@ static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
 {
        int err;
-        mutex_lock(&gr->ctx_mutex);
+        nvgpu_mutex_acquire(&gr->ctx_mutex);
        err = !gr->ctx_vars.golden_image_initialized;
-        mutex_unlock(&gr->ctx_mutex);
+        nvgpu_mutex_release(&gr->ctx_mutex);
        if (err)
                return false;
        return true;
@@ -1089,7 +1089,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
        /* since exec_reg_ops sends methods to the ucode, it must take the
         * global gpu lock to protect against mixing methods from debug sessions
         * on other channels */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        if (!dbg_s->is_pg_disabled && !gk20a_gpu_is_virtual(dbg_s->dev)) {
                /* In the virtual case, the server will handle
@@ -1150,7 +1150,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
                }
        }
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        if (!err && powergate_err)
                err = powergate_err;
@@ -1276,9 +1276,9 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
        gk20a_dbg_fn("%s  powergate mode = %d",
                      dev_name(dbg_s->dev), args->mode);
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, args->mode);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return  err;
 }
@@ -1299,7 +1299,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
        }
        /* Take the global lock, since we'll be doing global regops */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
        if (!ch_gk20a) {
@@ -1319,7 +1319,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
        err = g->ops.regops.apply_smpc_war(dbg_s);
 clean_up:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        gk20a_idle(g->dev);
        return  err;
 }
@@ -1341,7 +1341,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
        }
        /* Take the global lock, since we'll be doing global regops */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
        if (!ch_gk20a) {
@@ -1361,7 +1361,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
         * added here with gk20a being deprecated
         */
 clean_up:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        gk20a_idle(g->dev);
        return  err;
 }
@@ -1386,7 +1386,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
                return err;
        }
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        /* Suspend GPU context switching */
        err = gr_gk20a_disable_ctxsw(g);
@@ -1411,7 +1411,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
                gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n");
 clean_up:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        gk20a_idle(g->dev);
        return  err;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
index 773a669c..caa9395b 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -67,14 +67,14 @@ struct dbg_session_gk20a {
        /* list of bound channels, if any */
        struct list_head ch_list;
-        struct mutex ch_list_lock;
+        struct nvgpu_mutex ch_list_lock;
        /* event support */
        struct dbg_gpu_session_events dbg_events;
        bool broadcast_stop_trigger;
-        struct mutex ioctl_lock;
+        struct nvgpu_mutex ioctl_lock;
 };
 struct dbg_session_data {
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index d20229b3..4b8e61c4 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -421,7 +421,7 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
 #endif
 #ifdef CONFIG_DEBUG_FS
-        spin_lock_init(&g->debugfs_lock);
+        nvgpu_spinlock_init(&g->debugfs_lock);
        g->mm.ltc_enabled = true;
        g->mm.ltc_enabled_debug = true;
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 1f86fd8f..8244403e 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -60,8 +60,8 @@ struct gk20a_fecs_trace {
        struct mem_desc trace_buf;
        DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
-        struct mutex hash_lock;
+        struct nvgpu_mutex hash_lock;
-        struct mutex poll_lock;
+        struct nvgpu_mutex poll_lock;
        struct task_struct *poll_task;
 };
@@ -133,14 +133,14 @@ void gk20a_fecs_trace_hash_dump(struct gk20a *g)
        gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table");
-        mutex_lock(&trace->hash_lock);
+        nvgpu_mutex_acquire(&trace->hash_lock);
        hash_for_each(trace->pid_hash_table, bkt, ent, node)
        {
                gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
                        ent, bkt, ent->context_ptr, ent->pid);
        }
-        mutex_unlock(&trace->hash_lock);
+        nvgpu_mutex_release(&trace->hash_lock);
 }
 static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
@@ -161,9 +161,9 @@ static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid
        he->context_ptr = context_ptr;
        he->pid = pid;
-        mutex_lock(&trace->hash_lock);
+        nvgpu_mutex_acquire(&trace->hash_lock);
        hash_add(trace->pid_hash_table, &he->node, context_ptr);
-        mutex_unlock(&trace->hash_lock);
+        nvgpu_mutex_release(&trace->hash_lock);
        return 0;
 }
@@ -176,7 +176,7 @@ static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
                "freeing hash entry context_ptr=%x", context_ptr);
-        mutex_lock(&trace->hash_lock);
+        nvgpu_mutex_acquire(&trace->hash_lock);
        hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
                context_ptr) {
                if (ent->context_ptr == context_ptr) {
@@ -188,7 +188,7 @@ static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
                        break;
                }
        }
-        mutex_unlock(&trace->hash_lock);
+        nvgpu_mutex_release(&trace->hash_lock);
 }
 static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
@@ -200,12 +200,12 @@ static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
-        mutex_lock(&trace->hash_lock);
+        nvgpu_mutex_acquire(&trace->hash_lock);
        hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
                hash_del(&ent->node);
                kfree(ent);
        }
-        mutex_unlock(&trace->hash_lock);
+        nvgpu_mutex_release(&trace->hash_lock);
 }
@@ -215,7 +215,7 @@ static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
        struct gk20a_fecs_trace *trace = g->fecs_trace;
        pid_t pid = 0;
-        mutex_lock(&trace->hash_lock);
+        nvgpu_mutex_acquire(&trace->hash_lock);
        hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
                if (ent->context_ptr == context_ptr) {
                        gk20a_dbg(gpu_dbg_ctxsw,
@@ -225,7 +225,7 @@ static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
                        break;
                }
        }
-        mutex_unlock(&trace->hash_lock);
+        nvgpu_mutex_release(&trace->hash_lock);
        return pid;
 }
@@ -336,7 +336,7 @@ static int gk20a_fecs_trace_poll(struct gk20a *g)
        if (unlikely(err))
                return err;
-        mutex_lock(&trace->poll_lock);
+        nvgpu_mutex_acquire(&trace->poll_lock);
        write = gk20a_fecs_trace_get_write_index(g);
        if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
                gk20a_err(dev_from_gk20a(g),
@@ -371,7 +371,7 @@ static int gk20a_fecs_trace_poll(struct gk20a *g)
        gk20a_fecs_trace_set_read_index(g, read);
 done:
-        mutex_unlock(&trace->poll_lock);
+        nvgpu_mutex_release(&trace->poll_lock);
        gk20a_idle(g->dev);
        return err;
 }
@@ -580,8 +580,8 @@ static int gk20a_fecs_trace_init(struct gk20a *g)
                goto clean;
        }
-        mutex_init(&trace->poll_lock);
+        nvgpu_mutex_init(&trace->poll_lock);
-        mutex_init(&trace->hash_lock);
+        nvgpu_mutex_init(&trace->hash_lock);
        hash_init(trace->pid_hash_table);
        gk20a_fecs_trace_debugfs_init(g);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 4a32194c..c245f4a2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -477,7 +477,7 @@ void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
                kfree(runlist->active_tsgs);
                runlist->active_tsgs = NULL;
-                mutex_destroy(&runlist->mutex);
+                nvgpu_mutex_destroy(&runlist->mutex);
        }
        memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
@@ -650,7 +650,7 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
                                goto clean_up_runlist;
                        }
                }
-                mutex_init(&runlist->mutex);
+                nvgpu_mutex_init(&runlist->mutex);
                /* None of buffers is pinned if this value doesn't change.
                    Otherwise, one of them (cur_buffer) must have been pinned. */
@@ -809,8 +809,8 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
        f->g = g;
-        mutex_init(&f->intr.isr.mutex);
+        nvgpu_mutex_init(&f->intr.isr.mutex);
-        mutex_init(&f->gr_reset_mutex);
+        nvgpu_mutex_init(&f->gr_reset_mutex);
        gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */
        f->num_channels = g->ops.fifo.get_num_fifos(g);
@@ -846,7 +846,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
        init_runlist(g, f);
        INIT_LIST_HEAD(&f->free_chs);
-        mutex_init(&f->free_chs_mutex);
+        nvgpu_mutex_init(&f->free_chs_mutex);
        if (g->ops.mm.is_bar1_supported(g))
                err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
@@ -871,12 +871,12 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
                gk20a_init_channel_support(g, chid);
                gk20a_init_tsg_support(g, chid);
        }
-        mutex_init(&f->tsg_inuse_mutex);
+        nvgpu_mutex_init(&f->tsg_inuse_mutex);
        f->remove_support = gk20a_remove_fifo_support;
        f->deferred_reset_pending = false;
-        mutex_init(&f->deferred_reset_mutex);
+        nvgpu_mutex_init(&f->deferred_reset_mutex);
        f->sw_ready = true;
@@ -1224,7 +1224,7 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
        if (!ch)
                return verbose;
-        mutex_lock(&ch->error_notifier_mutex);
+        nvgpu_mutex_acquire(&ch->error_notifier_mutex);
        if (ch->error_notifier_ref) {
                u32 err = ch->error_notifier->info32;
                if (ch->error_notifier->status == 0xffff) {
@@ -1240,7 +1240,7 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
                                NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
                }
        }
-        mutex_unlock(&ch->error_notifier_mutex);
+        nvgpu_mutex_release(&ch->error_notifier_mutex);
        /* mark channel as faulted */
        ch->has_timedout = true;
@@ -1309,7 +1309,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 {
        u32 engine_id, engines;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        gr_gk20a_disable_ctxsw(g);
        if (!g->fifo.deferred_reset_pending)
@@ -1336,7 +1336,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 clean_up:
        gr_gk20a_enable_ctxsw(g);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
 }
@@ -1487,9 +1487,9 @@ static bool gk20a_fifo_handle_mmu_fault(
                } else if (engine_id != FIFO_INVAL_ENGINE_ID) {
                        /* if lock is already taken, a reset is taking place
                        so no need to repeat */
-                        if (mutex_trylock(&g->fifo.gr_reset_mutex)) {
+                        if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
                                gk20a_fifo_reset_engine(g, engine_id);
-                                mutex_unlock(&g->fifo.gr_reset_mutex);
+                                nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
                        }
                }
@@ -1646,7 +1646,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
        /* stop context switching to prevent engine assignments from
           changing until channel is recovered */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        gr_gk20a_disable_ctxsw(g);
        engines = gk20a_fifo_engines_on_id(g, hw_chid, false);
@@ -1667,7 +1667,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
        }
        gr_gk20a_enable_ctxsw(g);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
@@ -1676,7 +1676,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
        /* stop context switching to prevent engine assignments from
           changing until TSG is recovered */
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        gr_gk20a_disable_ctxsw(g);
        engines = gk20a_fifo_engines_on_id(g, tsgid, true);
@@ -1693,7 +1693,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
        }
        gr_gk20a_enable_ctxsw(g);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
@@ -2307,7 +2307,7 @@ void gk20a_fifo_isr(struct gk20a *g)
        if (g->fifo.sw_ready) {
                /* note we're not actually in an "isr", but rather
                 * in a threaded interrupt context... */
-                mutex_lock(&g->fifo.intr.isr.mutex);
+                nvgpu_mutex_acquire(&g->fifo.intr.isr.mutex);
                gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
@@ -2322,7 +2322,7 @@ void gk20a_fifo_isr(struct gk20a *g)
                if (unlikely(fifo_intr & error_intr_mask))
                        clear_intr = fifo_error_isr(g, fifo_intr);
-                mutex_unlock(&g->fifo.intr.isr.mutex);
+                nvgpu_mutex_release(&g->fifo.intr.isr.mutex);
        }
        gk20a_writel(g, fifo_intr_0_r(), clear_intr);
@@ -2434,7 +2434,7 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
        /* we have no idea which runlist we are using. lock all */
        for (i = 0; i < g->fifo.max_runlists; i++)
-                mutex_lock(&f->runlist_info[i].mutex);
+                nvgpu_mutex_acquire(&f->runlist_info[i].mutex);
        mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
@@ -2444,7 +2444,7 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
                pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
        for (i = 0; i < g->fifo.max_runlists; i++)
-                mutex_unlock(&f->runlist_info[i].mutex);
+                nvgpu_mutex_release(&f->runlist_info[i].mutex);
        return ret;
 }
@@ -2461,7 +2461,7 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
        /* we have no idea which runlist we are using. lock all */
        for (i = 0; i < g->fifo.max_runlists; i++)
-                mutex_lock(&f->runlist_info[i].mutex);
+                nvgpu_mutex_acquire(&f->runlist_info[i].mutex);
        mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
@@ -2471,7 +2471,7 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
                pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
        for (i = 0; i < g->fifo.max_runlists; i++)
-                mutex_unlock(&f->runlist_info[i].mutex);
+                nvgpu_mutex_release(&f->runlist_info[i].mutex);
        return ret;
 }
@@ -3046,7 +3046,7 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
        runlist = &f->runlist_info[runlist_id];
-        mutex_lock(&runlist->mutex);
+        nvgpu_mutex_acquire(&runlist->mutex);
        mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
@@ -3056,7 +3056,7 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
        if (!mutex_ret)
                pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
-        mutex_unlock(&runlist->mutex);
+        nvgpu_mutex_release(&runlist->mutex);
        return ret;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 20baf9de..1a248dba 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -41,7 +41,7 @@ struct fifo_runlist_info_gk20a {
        u32  total_entries;
        bool stopped;
        bool support_tsg;
-        struct mutex mutex; /* protect channel preempt and runlist upate */
+        struct nvgpu_mutex mutex; /* protect channel preempt and runlist update */
 };
 enum {
@@ -120,18 +120,18 @@ struct fifo_gk20a {
        struct channel_gk20a *channel;
        /* zero-kref'd channels here */
        struct list_head free_chs;
-        struct mutex free_chs_mutex;
+        struct nvgpu_mutex free_chs_mutex;
-        struct mutex gr_reset_mutex;
+        struct nvgpu_mutex gr_reset_mutex;
        struct tsg_gk20a *tsg;
-        struct mutex tsg_inuse_mutex;
+        struct nvgpu_mutex tsg_inuse_mutex;
        void (*remove_support)(struct fifo_gk20a *);
        bool sw_ready;
        struct {
                /* share info between isrs and non-isr code */
                struct {
-                        struct mutex mutex;
+                        struct nvgpu_mutex mutex;
                } isr;
                struct {
                        u32 device_fatal_0;
@@ -147,7 +147,7 @@ struct fifo_gk20a {
        unsigned long deferred_fault_engines;
        bool deferred_reset_pending;
-        struct mutex deferred_reset_mutex;
+        struct nvgpu_mutex deferred_reset_mutex;
 };
 static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 79c3fd09..32570d3d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -33,7 +33,7 @@
 #include <linux/thermal.h>
 #include <asm/cacheflush.h>
 #include <linux/debugfs.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <linux/clk/tegra.h>
 #include <linux/kthread.h>
 #include <linux/platform/tegra/common.h>
@@ -795,13 +795,13 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
        gk20a_dbg_fn("");
-        mutex_lock(&g->poweroff_lock);
+        nvgpu_mutex_acquire(&g->poweroff_lock);
        if (!g->power_on)
                goto done;
        if (gk20a_fifo_is_engine_busy(g)) {
-                mutex_unlock(&g->poweroff_lock);
+                nvgpu_mutex_release(&g->poweroff_lock);
                return -EBUSY;
        }
        gk20a_scale_suspend(dev);
@@ -844,7 +844,7 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
        gk20a_lockout_registers(g);
 done:
-        mutex_unlock(&g->poweroff_lock);
+        nvgpu_mutex_release(&g->poweroff_lock);
        return ret;
 }
@@ -1373,9 +1373,9 @@ static int gk20a_pm_unrailgate(struct device *dev)
        trace_gk20a_pm_unrailgate(dev_name(dev));
        if (platform->unrailgate) {
-                mutex_lock(&platform->railgate_lock);
+                nvgpu_mutex_acquire(&platform->railgate_lock);
                ret = platform->unrailgate(dev);
-                mutex_unlock(&platform->railgate_lock);
+                nvgpu_mutex_release(&platform->railgate_lock);
        }
 #ifdef CONFIG_DEBUG_FS
@@ -1896,11 +1896,11 @@ void gk20a_disable(struct gk20a *g, u32 units)
        gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
-        spin_lock(&g->mc_enable_lock);
+        nvgpu_spinlock_acquire(&g->mc_enable_lock);
        pmc = gk20a_readl(g, mc_enable_r());
        pmc &= ~units;
        gk20a_writel(g, mc_enable_r(), pmc);
-        spin_unlock(&g->mc_enable_lock);
+        nvgpu_spinlock_release(&g->mc_enable_lock);
 }
 void gk20a_enable(struct gk20a *g, u32 units)
@@ -1909,12 +1909,12 @@ void gk20a_enable(struct gk20a *g, u32 units)
        gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
-        spin_lock(&g->mc_enable_lock);
+        nvgpu_spinlock_acquire(&g->mc_enable_lock);
        pmc = gk20a_readl(g, mc_enable_r());
        pmc |= units;
        gk20a_writel(g, mc_enable_r(), pmc);
        gk20a_readl(g, mc_enable_r());
-        spin_unlock(&g->mc_enable_lock);
+        nvgpu_spinlock_release(&g->mc_enable_lock);
        udelay(20);
 }
@@ -1953,7 +1953,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
        down_write(&g->busy_lock);
        /* acquire railgate lock to prevent unrailgate in midst of do_idle() */
-        mutex_lock(&platform->railgate_lock);
+        nvgpu_mutex_acquire(&platform->railgate_lock);
        /* check if it is already railgated ? */
        if (platform->is_railgated(dev))
@@ -1963,7 +1963,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
         * release railgate_lock, prevent suspend by incrementing usage counter,
         * re-acquire railgate_lock
         */
-        mutex_unlock(&platform->railgate_lock);
+        nvgpu_mutex_release(&platform->railgate_lock);
        pm_runtime_get_sync(dev);
        /*
@@ -1975,7 +1975,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
                target_ref_cnt = 2;
        else
                target_ref_cnt = 1;
-        mutex_lock(&platform->railgate_lock);
+        nvgpu_mutex_acquire(&platform->railgate_lock);
        nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
                           NVGPU_TIMER_CPU_TIMER);
@@ -2052,7 +2052,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
 fail_drop_usage_count:
        pm_runtime_put_noidle(dev);
 fail_timeout:
-        mutex_unlock(&platform->railgate_lock);
+        nvgpu_mutex_release(&platform->railgate_lock);
        up_write(&g->busy_lock);
        return -EBUSY;
 }
@@ -2101,7 +2101,7 @@ int __gk20a_do_unidle(struct device *dev)
        }
        /* release the lock and open up all other busy() calls */
-        mutex_unlock(&platform->railgate_lock);
+        nvgpu_mutex_release(&platform->railgate_lock);
        up_write(&g->busy_lock);
        return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 31b02378..acc3b975 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -29,7 +29,7 @@ struct gk20a_ctxsw_trace;
 struct acr_desc;
 #include <linux/sched.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <linux/nvgpu.h>
 #include <linux/irqreturn.h>
 #include <soc/tegra/chip-id.h>
@@ -871,9 +871,9 @@ struct gk20a {
        bool timeouts_enabled;
 #endif
-        struct mutex ch_wdt_lock;
+        struct nvgpu_mutex ch_wdt_lock;
-        struct mutex poweroff_lock;
+        struct nvgpu_mutex poweroff_lock;
        /* Channel priorities */
        u32 timeslice_low_priority_us;
@@ -900,7 +900,7 @@ struct gk20a {
        u32 emc3d_ratio;
 #ifdef CONFIG_DEBUG_FS
-        spinlock_t debugfs_lock;
+        struct nvgpu_spinlock debugfs_lock;
        struct dentry *debugfs_ltc_enabled;
        struct dentry *debugfs_timeouts_enabled;
        struct dentry *debugfs_gr_idle_timeout_default;
@@ -924,11 +924,11 @@ struct gk20a {
        /* List of pending SW semaphore waits. */
        struct list_head pending_sema_waits;
-        raw_spinlock_t pending_sema_waits_lock;
+        struct nvgpu_raw_spinlock pending_sema_waits_lock;
        /* held while manipulating # of debug/profiler sessions present */
        /* also prevents debug sessions from attaching until released */
-        struct mutex dbg_sessions_lock;
+        struct nvgpu_mutex dbg_sessions_lock;
        int dbg_powergating_disabled_refcount; /*refcount for pg disable */
        int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
@@ -942,7 +942,7 @@ struct gk20a {
        u64 pg_ungating_time_us;
        u32 pg_gating_cnt;
-        spinlock_t mc_enable_lock;
+        struct nvgpu_spinlock mc_enable_lock;
        struct nvgpu_gpu_characteristics gpu_characteristics;
@@ -983,7 +983,7 @@ struct gk20a {
                struct device *node;
        } sched;
-        struct mutex client_lock;
+        struct nvgpu_mutex client_lock;
        int client_refcount; /* open channels and ctrl nodes */
        dev_t cdev_region;
@@ -1289,11 +1289,11 @@ static inline u32 get_field(u32 reg, u32 mask)
 /* invalidate channel lookup tlb */
 static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr)
 {
-        spin_lock(&gr->ch_tlb_lock);
+        nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
        memset(gr->chid_tlb, 0,
                sizeof(struct gr_channel_map_tlb_entry) *
                GR_CHANNEL_MAP_TLB_SIZE);
-        spin_unlock(&gr->ch_tlb_lock);
+        nvgpu_spinlock_release(&gr->ch_tlb_lock);
 }
 /* classes that the device supports */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d3b91a50..aad6c07b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -538,7 +538,7 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
        struct gr_gk20a *gr = &g->gr;
        int ret;
-        mutex_lock(&gr->fecs_mutex);
+        nvgpu_mutex_acquire(&gr->fecs_mutex);
        if (op.mailbox.id != 0)
                gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
@@ -561,7 +561,7 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
                                      op.cond.fail, op.mailbox.fail,
                                      sleepduringwait);
-        mutex_unlock(&gr->fecs_mutex);
+        nvgpu_mutex_release(&gr->fecs_mutex);
        return ret;
 }
@@ -573,7 +573,7 @@ int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
        struct gr_gk20a *gr = &g->gr;
        int ret;
-        mutex_lock(&gr->fecs_mutex);
+        nvgpu_mutex_acquire(&gr->fecs_mutex);
        gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(op.mailbox.id),
                gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
@@ -587,7 +587,7 @@ int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
                                      op.cond.fail, op.mailbox.fail,
                                      false);
-        mutex_unlock(&gr->fecs_mutex);
+        nvgpu_mutex_release(&gr->fecs_mutex);
        return ret;
 }
@@ -1596,7 +1596,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
        /* golden ctx is global to all channels. Although only the first
           channel initializes golden image, driver needs to prevent multiple
           channels from initializing golden ctx at the same time */
-        mutex_lock(&gr->ctx_mutex);
+        nvgpu_mutex_acquire(&gr->ctx_mutex);
        if (gr->ctx_vars.golden_image_initialized) {
                goto clean_up;
@@ -1825,7 +1825,7 @@ clean_up:
        gk20a_mem_end(g, gold_mem);
        gk20a_mem_end(g, gr_mem);
-        mutex_unlock(&gr->ctx_mutex);
+        nvgpu_mutex_release(&gr->ctx_mutex);
        return err;
 }
@@ -3327,7 +3327,7 @@ out:
 int gk20a_comptag_allocator_init(struct gk20a_comptag_allocator *allocator,
                unsigned long size)
 {
-        mutex_init(&allocator->lock);
+        nvgpu_mutex_init(&allocator->lock);
        /*
         * 0th comptag is special and is never used. The base for this bitmap
         * is 1, and its size is one less than the size of comptag store.
@@ -4064,7 +4064,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
        /* no endian swap ? */
-        mutex_lock(&gr->zbc_lock);
+        nvgpu_mutex_acquire(&gr->zbc_lock);
        switch (zbc_val->type) {
        case GK20A_ZBC_TYPE_COLOR:
                /* search existing tables */
@@ -4159,7 +4159,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
        }
 err_mutex:
-        mutex_unlock(&gr->zbc_lock);
+        nvgpu_mutex_release(&gr->zbc_lock);
        return ret;
 }
@@ -4267,7 +4267,7 @@ int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
        struct zbc_entry zbc_val;
        u32 i, err;
-        mutex_init(&gr->zbc_lock);
+        nvgpu_mutex_init(&gr->zbc_lock);
        /* load default color table */
        zbc_val.type = GK20A_ZBC_TYPE_COLOR;
@@ -5136,7 +5136,7 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
        gr->g = g;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-        mutex_init(&g->gr.cs_lock);
+        nvgpu_mutex_init(&g->gr.cs_lock);
 #endif
        err = gr_gk20a_init_gr_config(g, gr);
@@ -5172,8 +5172,8 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
        gr_gk20a_load_zbc_default_table(g, gr);
-        mutex_init(&gr->ctx_mutex);
+        nvgpu_mutex_init(&gr->ctx_mutex);
-        spin_lock_init(&gr->ch_tlb_lock);
+        nvgpu_spinlock_init(&gr->ch_tlb_lock);
        gr->remove_support = gk20a_remove_gr_support;
        gr->sw_ready = true;
@@ -5244,7 +5244,7 @@ int gk20a_init_gr_support(struct gk20a *g)
        gk20a_dbg_fn("");
        /* this is required before gr_gk20a_init_ctx_state */
-        mutex_init(&g->gr.fecs_mutex);
+        nvgpu_mutex_init(&g->gr.fecs_mutex);
        err = gr_gk20a_init_ctxsw(g);
        if (err)
@@ -5468,7 +5468,7 @@ int gk20a_gr_reset(struct gk20a *g)
        int err;
        u32 size;
-        mutex_lock(&g->gr.fecs_mutex);
+        nvgpu_mutex_acquire(&g->gr.fecs_mutex);
        err = gk20a_enable_gr_hw(g);
        if (err)
@@ -5482,7 +5482,7 @@ int gk20a_gr_reset(struct gk20a *g)
        if (err)
                return err;
-        mutex_unlock(&g->gr.fecs_mutex);
+        nvgpu_mutex_release(&g->gr.fecs_mutex);
        /* this appears query for sw states but fecs actually init
           ramchain, etc so this is hw init */
@@ -5731,7 +5731,7 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
        if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
                return 0;
-        mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+        nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
        virtual_address = ch->cyclestate.cyclestate_buffer;
        buffer_size = ch->cyclestate.cyclestate_buffer_size;
@@ -5843,7 +5843,7 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
                sh_hdr->completed = true;
                offset += sh_hdr->size;
        }
-        mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+        nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
 #endif
        gk20a_dbg_fn("");
        wake_up(&ch->notifier_wq);
@@ -5874,7 +5874,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
        if (!gr_fecs_current_ctx_valid_v(curr_ctx))
                return NULL;
-        spin_lock(&gr->ch_tlb_lock);
+        nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
        /* check cache first */
        for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5926,7 +5926,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
                (GR_CHANNEL_MAP_TLB_SIZE - 1);
 unlock:
-        spin_unlock(&gr->ch_tlb_lock);
+        nvgpu_spinlock_release(&gr->ch_tlb_lock);
        if (curr_tsgid)
                *curr_tsgid = tsgid;
        return ret;
@@ -5998,7 +5998,7 @@ static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
                        gr_gpc0_tpc0_sm_cfg_r() + offset));
@@ -6012,7 +6012,7 @@ static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
        gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
                        gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
 }
@@ -6029,7 +6029,7 @@ static int gk20a_gr_update_sm_error_state(struct gk20a *g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        int err = 0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        gr->sm_error_states[sm_id].hww_global_esr =
                        sm_error_state->hww_global_esr;
@@ -6081,7 +6081,7 @@ enable_ctxsw:
        err = gr_gk20a_enable_ctxsw(g);
 fail:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -6096,7 +6096,7 @@ static int gk20a_gr_clear_sm_error_state(struct gk20a *g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        int err = 0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
@@ -6122,7 +6122,7 @@ static int gk20a_gr_clear_sm_error_state(struct gk20a *g,
        err = gr_gk20a_enable_ctxsw(g);
 fail:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -9128,7 +9128,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
        struct dbg_session_channel_data *ch_data;
        int err = 0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        err = gr_gk20a_disable_ctxsw(g);
        if (err) {
@@ -9136,7 +9136,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
                goto clean_up;
        }
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
                ch = g->fifo.channel + ch_data->chid;
@@ -9146,7 +9146,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
                        local_ctx_resident_ch_fd = ch_data->channel_fd;
        }
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
        err = gr_gk20a_enable_ctxsw(g);
        if (err)
@@ -9155,7 +9155,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
        *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
 clean_up:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -9170,7 +9170,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
        int err = 0;
        struct dbg_session_channel_data *ch_data;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        err = gr_gk20a_disable_ctxsw(g);
        if (err) {
@@ -9193,7 +9193,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
        *ctx_resident_ch_fd = local_ctx_resident_ch_fd;
 clean_up:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 5a987a82..2dd1eaf5 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -275,8 +275,8 @@ struct gr_gk20a {
                bool valid;
        } ctx_vars;
-        struct mutex ctx_mutex; /* protect golden ctx init */
+        struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
-        struct mutex fecs_mutex; /* protect fecs method */
+        struct nvgpu_mutex fecs_mutex; /* protect fecs method */
 #define GR_NETLIST_DYNAMIC      -1
 #define GR_NETLIST_STATIC_A     'A'
@@ -333,7 +333,7 @@ struct gr_gk20a {
        u32 max_comptag_mem; /* max memory size (MB) for comptag */
        struct compbit_store_desc compbit_store;
        struct gk20a_comptag_allocator {
-                struct mutex lock;
+                struct nvgpu_mutex lock;
                /* this bitmap starts at ctag 1. 0th cannot be taken */
                unsigned long *bitmap;
                /* size of bitmap, not max ctags, so one less */
@@ -342,7 +342,7 @@ struct gr_gk20a {
        struct gr_zcull_gk20a zcull;
-        struct mutex zbc_lock;
+        struct nvgpu_mutex zbc_lock;
        struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
        struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
 #ifdef CONFIG_TEGRA_19x_GPU
@@ -363,7 +363,7 @@ struct gr_gk20a {
 #define GR_CHANNEL_MAP_TLB_SIZE         2 /* must of power of 2 */
        struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
        u32 channel_tlb_flush_index;
-        spinlock_t ch_tlb_lock;
+        struct nvgpu_spinlock ch_tlb_lock;
        void (*remove_support)(struct gr_gk20a *gr);
        bool sw_ready;
@@ -379,7 +379,7 @@ struct gr_gk20a {
        struct sm_info *sm_to_cluster;
        struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-        struct mutex                    cs_lock;
+        struct nvgpu_mutex                      cs_lock;
        struct gk20a_cs_snapshot        *cs_data;
 #endif
 };
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index e1c23f79..13819872 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -3,7 +3,7 @@
 *
 * GK20A Graphics
 *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -138,7 +138,7 @@ static void gk20a_ltc_sync_debugfs(struct gk20a *g)
 {
        u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
-        spin_lock(&g->debugfs_lock);
+        nvgpu_spinlock_acquire(&g->debugfs_lock);
        if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
                u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
                if (g->mm.ltc_enabled_debug)
@@ -151,6 +151,6 @@ static void gk20a_ltc_sync_debugfs(struct gk20a *g)
                gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
                g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
        }
-        spin_unlock(&g->debugfs_lock);
+        nvgpu_spinlock_release(&g->debugfs_lock);
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 103952ca..5db48ae7 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A L2
 *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -120,7 +120,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
        if (gr->compbit_store.mem.size == 0)
                return 0;
-        mutex_lock(&g->mm.l2_op_lock);
+        nvgpu_mutex_acquire(&g->mm.l2_op_lock);
        if (op == gk20a_cbc_op_clear) {
                gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
@@ -168,7 +168,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
        }
 out:
        trace_gk20a_ltc_cbc_ctrl_done(dev_name(g->dev));
-        mutex_unlock(&g->mm.l2_op_lock);
+        nvgpu_mutex_release(&g->mm.l2_op_lock);
        return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c62d1f6c..2539138a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -142,7 +142,7 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem,
        WARN_ON(!bufbase);
-        spin_lock(&g->mm.pramin_window_lock);
+        nvgpu_spinlock_acquire(&g->mm.pramin_window_lock);
        if (g->mm.pramin_window != win) {
                gk20a_writel(g, bus_bar0_window_r(), win);
@@ -158,7 +158,7 @@ static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem,
 {
        gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk);
-        spin_unlock(&g->mm.pramin_window_lock);
+        nvgpu_spinlock_release(&g->mm.pramin_window_lock);
 }
 /*
@@ -483,7 +483,7 @@ static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
 static struct gk20a *gk20a_vidmem_buf_owner(struct dma_buf *dmabuf);
 struct gk20a_dmabuf_priv {
-        struct mutex lock;
+        struct nvgpu_mutex lock;
        struct gk20a_comptag_allocator *comptag_allocator;
        struct gk20a_comptags comptags;
@@ -514,7 +514,7 @@ static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
        unsigned long addr;
        int err = 0;
-        mutex_lock(&allocator->lock);
+        nvgpu_mutex_acquire(&allocator->lock);
        addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size,
                        0, len, 0);
        if (addr < allocator->size) {
@@ -524,7 +524,7 @@ static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
        } else {
                err = -ENOMEM;
        }
-        mutex_unlock(&allocator->lock);
+        nvgpu_mutex_release(&allocator->lock);
        return err;
 }
@@ -538,9 +538,9 @@ static void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
        WARN_ON(addr > allocator->size);
        WARN_ON(addr + len > allocator->size);
-        mutex_lock(&allocator->lock);
+        nvgpu_mutex_acquire(&allocator->lock);
        bitmap_clear(allocator->bitmap, addr, len);
-        mutex_unlock(&allocator->lock);
+        nvgpu_mutex_release(&allocator->lock);
 }
 static void gk20a_mm_delete_priv(void *_priv)
@@ -575,12 +575,12 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
        if (WARN_ON(!priv))
                return ERR_PTR(-EINVAL);
-        mutex_lock(&priv->lock);
+        nvgpu_mutex_acquire(&priv->lock);
        if (priv->pin_count == 0) {
                priv->attach = dma_buf_attach(dmabuf, dev);
                if (IS_ERR(priv->attach)) {
-                        mutex_unlock(&priv->lock);
+                        nvgpu_mutex_release(&priv->lock);
                        return (struct sg_table *)priv->attach;
                }
@@ -588,13 +588,13 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
                                                   DMA_BIDIRECTIONAL);
                if (IS_ERR(priv->sgt)) {
                        dma_buf_detach(dmabuf, priv->attach);
-                        mutex_unlock(&priv->lock);
+                        nvgpu_mutex_release(&priv->lock);
                        return priv->sgt;
                }
        }
        priv->pin_count++;
-        mutex_unlock(&priv->lock);
+        nvgpu_mutex_release(&priv->lock);
        return priv->sgt;
 }
@@ -607,7 +607,7 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
        if (IS_ERR(priv) || !priv)
                return;
-        mutex_lock(&priv->lock);
+        nvgpu_mutex_acquire(&priv->lock);
        WARN_ON(priv->sgt != sgt);
        priv->pin_count--;
        WARN_ON(priv->pin_count < 0);
@@ -617,7 +617,7 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
                                         DMA_BIDIRECTIONAL);
                dma_buf_detach(dmabuf, priv->attach);
        }
-        mutex_unlock(&priv->lock);
+        nvgpu_mutex_release(&priv->lock);
 }
 void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
@@ -842,7 +842,7 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
 static void gk20a_init_pramin(struct mm_gk20a *mm)
 {
        mm->pramin_window = 0;
-        spin_lock_init(&mm->pramin_window_lock);
+        nvgpu_spinlock_init(&mm->pramin_window_lock);
        mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
 }
@@ -971,12 +971,12 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
        mm->vidmem.bootstrap_base = bootstrap_base;
        mm->vidmem.bootstrap_size = bootstrap_size;
-        mutex_init(&mm->vidmem.first_clear_mutex);
+        nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
        INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
        atomic64_set(&mm->vidmem.bytes_pending, 0);
        INIT_LIST_HEAD(&mm->vidmem.clear_list_head);
-        mutex_init(&mm->vidmem.clear_list_mutex);
+        nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
        gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
@@ -998,7 +998,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
        }
        mm->g = g;
-        mutex_init(&mm->l2_op_lock);
+        nvgpu_mutex_init(&mm->l2_op_lock);
        /*TBD: make channel vm size configurable */
        mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
@@ -1484,12 +1484,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
                return 0;
        }
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        buffer_list = nvgpu_kalloc(sizeof(*buffer_list) *
                              vm->num_user_mapped_buffers, true);
        if (!buffer_list) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                return -ENOMEM;
        }
@@ -1510,7 +1510,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
        *num_buffers = vm->num_user_mapped_buffers;
        *mapped_buffers = buffer_list;
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return 0;
 }
@@ -1544,9 +1544,9 @@ void gk20a_vm_mapping_batch_finish_locked(
 void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm,
                                   struct vm_gk20a_mapping_batch *mapping_batch)
 {
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
@@ -1559,7 +1559,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
        if (num_buffers == 0)
                return;
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        gk20a_vm_mapping_batch_start(&batch);
        vm->kref_put_batch = &batch;
@@ -1569,7 +1569,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
        vm->kref_put_batch = NULL;
        gk20a_vm_mapping_batch_finish_locked(vm, &batch);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        nvgpu_kfree(mapped_buffers);
 }
@@ -1581,17 +1581,17 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
        int retries = 10000; /* 50 ms */
        struct mapped_buffer_node *mapped_buffer;
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
        if (!mapped_buffer) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
                return;
        }
        if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                while (retries >= 0 || !tegra_platform_is_silicon()) {
                        if (atomic_read(&mapped_buffer->ref.refcount) == 1)
@@ -1602,11 +1602,11 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
                if (retries < 0 && tegra_platform_is_silicon())
                        gk20a_err(d, "sync-unmap failed on 0x%llx",
                                                                offset);
-                mutex_lock(&vm->update_gmmu_lock);
+                nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        }
        if (mapped_buffer->user_mapped == 0) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d, "addr already unmapped from user 0x%llx", offset);
                return;
        }
@@ -1619,7 +1619,7 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
        kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
        vm->kref_put_batch = NULL;
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
@@ -2239,7 +2239,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
        buf->g = g;
        if (!g->mm.vidmem.cleared) {
-                mutex_lock(&g->mm.vidmem.first_clear_mutex);
+                nvgpu_mutex_acquire(&g->mm.vidmem.first_clear_mutex);
                if (!g->mm.vidmem.cleared) {
                        err = gk20a_vidmem_clear_all(g);
                        if (err) {
@@ -2248,7 +2248,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
                                goto err_kfree;
                        }
                }
-                mutex_unlock(&g->mm.vidmem.first_clear_mutex);
+                nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
        }
        buf->mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL);
@@ -2301,10 +2301,10 @@ int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
        if (!nvgpu_alloc_initialized(allocator))
                return -ENOSYS;
-        mutex_lock(&g->mm.vidmem.clear_list_mutex);
+        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
        *space = nvgpu_alloc_space(allocator) +
                atomic64_read(&g->mm.vidmem.bytes_pending);
-        mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
        return 0;
 #else
        return -ENOSYS;
@@ -2425,7 +2425,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                return -EFAULT;
        }
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        /* check if this buffer is already mapped */
        if (!vm->userspace_managed) {
@@ -2434,7 +2434,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                        flags, kind, sgt,
                        user_mapped, rw_flag);
                if (map_offset) {
-                        mutex_unlock(&vm->update_gmmu_lock);
+                        nvgpu_mutex_release(&vm->update_gmmu_lock);
                        return map_offset;
                }
        }
@@ -2627,7 +2627,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                mapped_buffer->va_node = va_node;
        }
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return map_offset;
@@ -2643,7 +2643,7 @@ clean_up:
        if (!IS_ERR(bfr.sgt))
                gk20a_mm_unpin(d, dmabuf, bfr.sgt);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        gk20a_dbg_info("err=%d\n", err);
        return 0;
 }
@@ -2658,13 +2658,13 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
        struct mapped_buffer_node *mapped_buffer;
        struct device *d = dev_from_vm(vm);
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
        if (!mapped_buffer || !mapped_buffer->user_mapped)
        {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
                return -EFAULT;
        }
@@ -2685,7 +2685,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
                *mapping_ctagline = mapped_buffer->ctag_offset;
        }
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return 0;
 }
@@ -2716,19 +2716,19 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                return -EFAULT;
        }
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        mapped_buffer =
                find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
        if (!mapped_buffer || !mapped_buffer->user_mapped) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
                return -EFAULT;
        }
        if (!mapped_buffer->ctags_mappable) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
                          __func__, mapping_gva);
                return -EFAULT;
@@ -2747,7 +2747,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                u64 cacheline_offset_start;
                if (!mapped_buffer->ctag_map_win_size) {
-                        mutex_unlock(&vm->update_gmmu_lock);
+                        nvgpu_mutex_release(&vm->update_gmmu_lock);
                        gk20a_err(d,
                                  "%s: mapping 0x%llx does not have "
                                  "mappable comptags",
@@ -2774,7 +2774,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                                mapped_buffer->ctag_map_win_size, &va_node);
                        if (err) {
-                                mutex_unlock(&vm->update_gmmu_lock);
+                                nvgpu_mutex_release(&vm->update_gmmu_lock);
                                return err;
                        }
@@ -2783,7 +2783,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                                 * pointer if the space is freed
                                 * before before the buffer is
                                 * unmapped */
-                                mutex_unlock(&vm->update_gmmu_lock);
+                                nvgpu_mutex_release(&vm->update_gmmu_lock);
                                gk20a_err(d,
                                          "%s: comptags cannot be mapped into allocated space",
                                          __func__);
@@ -2810,7 +2810,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                                g->gr.compbit_store.mem.aperture);
                if (!mapped_buffer->ctag_map_win_addr) {
-                        mutex_unlock(&vm->update_gmmu_lock);
+                        nvgpu_mutex_release(&vm->update_gmmu_lock);
                        gk20a_err(d,
                                  "%s: failed to map comptags for mapping 0x%llx",
                                  __func__, mapping_gva);
@@ -2818,7 +2818,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
                }
        } else if (fixed_mapping && *compbits_win_gva &&
                   mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d,
                          "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
                          "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
@@ -2830,7 +2830,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
        *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
        *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return 0;
 }
@@ -2852,7 +2852,7 @@ static u64 __gk20a_gmmu_map(struct vm_gk20a *vm,
        struct gk20a *g = gk20a_from_vm(vm);
        u64 vaddr;
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        vaddr = g->ops.mm.gmmu_map(vm, addr,
                                *sgt, /* sg table */
                                0, /* sg offset */
@@ -2866,7 +2866,7 @@ static u64 __gk20a_gmmu_map(struct vm_gk20a *vm,
                                priv, /* priv */
                                NULL, /* mapping_batch handle */
                                aperture);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        if (!vaddr) {
                gk20a_err(dev_from_vm(vm), "failed to allocate va space");
                return 0;
@@ -3128,10 +3128,10 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
         * are not done anyway */
        WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING);
-        mutex_lock(&g->mm.vidmem.clear_list_mutex);
+        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
        before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
        addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
-        mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
        if (!addr) {
                /*
                 * If memory is known to be freed soon, let the user know that
@@ -3188,12 +3188,12 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
        bool was_empty;
        if (mem->user_mem) {
-                mutex_lock(&g->mm.vidmem.clear_list_mutex);
+                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
                was_empty = list_empty(&g->mm.vidmem.clear_list_head);
                list_add_tail(&mem->clear_list_entry,
                              &g->mm.vidmem.clear_list_head);
                atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
-                mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
                if (was_empty) {
                        cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
@@ -3258,12 +3258,12 @@ static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm)
 {
        struct mem_desc *mem = NULL;
-        mutex_lock(&mm->vidmem.clear_list_mutex);
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
        mem = list_first_entry_or_null(&mm->vidmem.clear_list_head,
                        struct mem_desc, clear_list_entry);
        if (mem)
                list_del_init(&mem->clear_list_entry);
-        mutex_unlock(&mm->vidmem.clear_list_mutex);
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
        return mem;
 }
@@ -3409,12 +3409,12 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
        dma_addr_t addr = 0;
        struct gk20a *g = gk20a_from_vm(vm);
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr);
        if (buffer)
                addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
                                buffer->flags);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return addr;
 }
@@ -3426,7 +3426,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
 {
        struct gk20a *g = gk20a_from_vm(vm);
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        g->ops.mm.gmmu_unmap(vm,
                        vaddr,
                        size,
@@ -3435,7 +3435,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
                        rw_flag,
                        false,
                        NULL);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
@@ -4053,16 +4053,16 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
        struct device *d = dev_from_vm(vm);
        struct mapped_buffer_node *mapped_buffer;
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
        if (!mapped_buffer) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
                return;
        }
        kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 static void gk20a_vm_free_entries(struct vm_gk20a *vm,
@@ -4101,7 +4101,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
                }
        }
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        /* TBD: add a flag here for the unmap code to recognize teardown
         * and short-circuit any otherwise expensive operations. */
@@ -4123,7 +4123,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
        gk20a_deinit_vm(vm);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 void gk20a_vm_remove_support(struct vm_gk20a *vm)
@@ -4547,7 +4547,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
        vm->mapped_buffers = RB_ROOT;
-        mutex_init(&vm->update_gmmu_lock);
+        nvgpu_mutex_init(&vm->update_gmmu_lock);
        kref_init(&vm->ref);
        INIT_LIST_HEAD(&vm->reserved_va_list);
@@ -4696,7 +4696,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        INIT_LIST_HEAD(&va_node->va_buffers_list);
        INIT_LIST_HEAD(&va_node->reserved_va_list);
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        /* mark that we need to use sparse mappings here */
        if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) {
@@ -4715,7 +4715,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
                                         NULL,
                                         APERTURE_INVALID);
                if (!map_offset) {
-                        mutex_unlock(&vm->update_gmmu_lock);
+                        nvgpu_mutex_release(&vm->update_gmmu_lock);
                        nvgpu_free(vma, vaddr_start);
                        kfree(va_node);
                        goto clean_up;
@@ -4725,7 +4725,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
        }
        list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        args->o_a.offset = vaddr_start;
        err = 0;
@@ -4754,7 +4754,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
        vma = vm->vma[pgsz_idx];
        nvgpu_free(vma, args->offset);
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        va_node = addr_to_reservation(vm, args->offset);
        if (va_node) {
                struct mapped_buffer_node *buffer, *n;
@@ -4782,7 +4782,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
                                        NULL);
                kfree(va_node);
        }
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        err = 0;
        return err;
@@ -4819,7 +4819,7 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
        if (likely(priv))
                return 0;
-        mutex_lock(&priv_lock);
+        nvgpu_mutex_acquire(&priv_lock);
        priv = dma_buf_get_drvdata(dmabuf, dev);
        if (priv)
                goto priv_exist_or_err;
@@ -4828,12 +4828,12 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
                priv = ERR_PTR(-ENOMEM);
                goto priv_exist_or_err;
        }
-        mutex_init(&priv->lock);
+        nvgpu_mutex_init(&priv->lock);
        INIT_LIST_HEAD(&priv->states);
        priv->buffer_id = ++priv_count;
        dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
 priv_exist_or_err:
-        mutex_unlock(&priv_lock);
+        nvgpu_mutex_release(&priv_lock);
        if (IS_ERR(priv))
                return -ENOMEM;
@@ -4858,7 +4858,7 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
        if (WARN_ON(!priv))
                return -ENOSYS;
-        mutex_lock(&priv->lock);
+        nvgpu_mutex_acquire(&priv->lock);
        list_for_each_entry(s, &priv->states, list)
                if (s->offset == offset)
@@ -4873,11 +4873,11 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
        s->offset = offset;
        INIT_LIST_HEAD(&s->list);
-        mutex_init(&s->lock);
+        nvgpu_mutex_init(&s->lock);
        list_add_tail(&s->list, &priv->states);
 out:
-        mutex_unlock(&priv->lock);
+        nvgpu_mutex_release(&priv->lock);
        if (!err)
                *state = s;
        return err;
@@ -5152,7 +5152,7 @@ int gk20a_mm_fb_flush(struct gk20a *g)
        nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
-        mutex_lock(&mm->l2_op_lock);
+        nvgpu_mutex_acquire(&mm->l2_op_lock);
        /* Make sure all previous writes are committed to the L2. There's no
           guarantee that writes are to DRAM. This will be a sysmembar internal
@@ -5184,7 +5184,7 @@ int gk20a_mm_fb_flush(struct gk20a *g)
        trace_gk20a_mm_fb_flush_done(dev_name(g->dev));
-        mutex_unlock(&mm->l2_op_lock);
+        nvgpu_mutex_release(&mm->l2_op_lock);
        pm_runtime_put_noidle(g->dev);
@@ -5231,9 +5231,9 @@ void gk20a_mm_l2_invalidate(struct gk20a *g)
        struct mm_gk20a *mm = &g->mm;
        gk20a_busy_noresume(g->dev);
        if (g->power_on) {
-                mutex_lock(&mm->l2_op_lock);
+                nvgpu_mutex_acquire(&mm->l2_op_lock);
                gk20a_mm_l2_invalidate_locked(g);
-                mutex_unlock(&mm->l2_op_lock);
+                nvgpu_mutex_release(&mm->l2_op_lock);
        }
        pm_runtime_put_noidle(g->dev);
 }
@@ -5252,7 +5252,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
        nvgpu_timeout_init(g, &timeout, 2000, NVGPU_TIMER_RETRY_TIMER);
-        mutex_lock(&mm->l2_op_lock);
+        nvgpu_mutex_acquire(&mm->l2_op_lock);
        trace_gk20a_mm_l2_flush(dev_name(g->dev));
@@ -5280,7 +5280,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
        if (invalidate)
                gk20a_mm_l2_invalidate_locked(g);
-        mutex_unlock(&mm->l2_op_lock);
+        nvgpu_mutex_release(&mm->l2_op_lock);
 hw_was_off:
        pm_runtime_put_noidle(g->dev);
@@ -5300,7 +5300,7 @@ void gk20a_mm_cbc_clean(struct gk20a *g)
        nvgpu_timeout_init(g, &timeout, 200, NVGPU_TIMER_RETRY_TIMER);
-        mutex_lock(&mm->l2_op_lock);
+        nvgpu_mutex_acquire(&mm->l2_op_lock);
        /* Flush all dirty lines from the CBC to L2 */
        gk20a_writel(g, flush_l2_clean_comptags_r(),
@@ -5320,7 +5320,7 @@ void gk20a_mm_cbc_clean(struct gk20a *g)
        } while (!nvgpu_timeout_expired_msg(&timeout,
                                         "l2_clean_comptags too many retries"));
-        mutex_unlock(&mm->l2_op_lock);
+        nvgpu_mutex_release(&mm->l2_op_lock);
 hw_was_off:
        pm_runtime_put_noidle(g->dev);
@@ -5334,19 +5334,19 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
        gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
                                                        gpu_va);
        if (!mapped_buffer) {
-                mutex_unlock(&vm->update_gmmu_lock);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
                return -EINVAL;
        }
        *dmabuf = mapped_buffer->dmabuf;
        *offset = gpu_va - mapped_buffer->addr;
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        return 0;
 }
@@ -5373,7 +5373,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
        addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0) >> 12);
-        mutex_lock(&tlb_lock);
+        nvgpu_mutex_acquire(&tlb_lock);
        trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));
@@ -5414,7 +5414,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
        trace_gk20a_mm_tlb_invalidate_done(dev_name(g->dev));
 out:
-        mutex_unlock(&tlb_lock);
+        nvgpu_mutex_release(&tlb_lock);
 }
 int gk20a_mm_suspend(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 294dc628..5f29c9e7 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -131,7 +131,7 @@ struct gk20a_buffer_state {
        struct list_head list;
        /* The valid compbits and the fence must be changed atomically. */
-        struct mutex lock;
+        struct nvgpu_mutex lock;
        /* Offset of the surface within the dma-buf whose state is
         * described by this struct (one dma-buf can contain multiple
@@ -272,7 +272,7 @@ struct vm_gk20a {
        struct kref ref;
-        struct mutex update_gmmu_lock;
+        struct nvgpu_mutex update_gmmu_lock;
        struct gk20a_mm_entry pdb;
@@ -360,7 +360,7 @@ struct mm_gk20a {
                struct vm_gk20a vm;
        } ce;
-        struct mutex l2_op_lock;
+        struct nvgpu_mutex l2_op_lock;
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
        struct mem_desc bar2_desc;
 #endif
@@ -395,7 +395,7 @@ struct mm_gk20a {
        struct mem_desc sysmem_flush;
        u32 pramin_window;
-        spinlock_t pramin_window_lock;
+        struct nvgpu_spinlock pramin_window_lock;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
        u32 force_pramin; /* via debugfs */
 #else
@@ -413,10 +413,10 @@ struct mm_gk20a {
                u32 ce_ctx_id;
                volatile bool cleared;
-                struct mutex first_clear_mutex;
+                struct nvgpu_mutex first_clear_mutex;
                struct list_head clear_list_head;
-                struct mutex clear_list_mutex;
+                struct nvgpu_mutex clear_list_mutex;
                struct work_struct clear_mem_worker;
                atomic64_t bytes_pending;
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index fa0909ee..c841c8e6 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -20,6 +20,8 @@
 #include <linux/dma-attrs.h>
 #include <linux/version.h>
+#include <nvgpu/lock.h>
 #define GK20A_CLKS_MAX          4
 struct gk20a;
@@ -184,7 +186,7 @@ struct gk20a_platform {
        /* Called to turn on the device */
        int (*unrailgate)(struct device *dev);
-        struct mutex railgate_lock;
+        struct nvgpu_mutex railgate_lock;
        /* Called to check state of device */
        bool (*is_railgated)(struct device *dev);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 5ba9d25b..225b98e4 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -357,7 +357,7 @@ static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
                emc_rate = tegra_bwmgr_get_max_emc_rate();
        emc_params->freq_last_set = emc_rate;
-        mutex_lock(&platform->railgate_lock);
+        nvgpu_mutex_acquire(&platform->railgate_lock);
        if (platform->is_railgated && !platform->is_railgated(dev))
                goto done;
@@ -365,7 +365,7 @@ static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
                        TEGRA_BWMGR_SET_EMC_FLOOR);
 done:
-        mutex_unlock(&platform->railgate_lock);
+        nvgpu_mutex_release(&platform->railgate_lock);
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 6227d523..4ea9b911 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -1414,11 +1414,11 @@ int gk20a_init_pmu(struct pmu_gk20a *pmu)
        struct gk20a *g = gk20a_from_pmu(pmu);
        struct pmu_v *pv = &g->ops.pmu_ver;
-        mutex_init(&pmu->elpg_mutex);
+        nvgpu_mutex_init(&pmu->elpg_mutex);
-        mutex_init(&pmu->pg_mutex);
+        nvgpu_mutex_init(&pmu->pg_mutex);
-        mutex_init(&pmu->isr_mutex);
+        nvgpu_mutex_init(&pmu->isr_mutex);
-        mutex_init(&pmu->pmu_copy_lock);
+        nvgpu_mutex_init(&pmu->pmu_copy_lock);
-        mutex_init(&pmu->pmu_seq_lock);
+        nvgpu_mutex_init(&pmu->pmu_seq_lock);
        pmu->remove_support = gk20a_remove_pmu_support;
@@ -2189,7 +2189,7 @@ void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
                return;
        }
-        mutex_lock(&pmu->pmu_copy_lock);
+        nvgpu_mutex_acquire(&pmu->pmu_copy_lock);
        words = size >> 2;
        bytes = size & 0x3;
@@ -2211,7 +2211,7 @@ void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
                        dst[(words << 2) + i] = ((u8 *)&data)[i];
                }
        }
-        mutex_unlock(&pmu->pmu_copy_lock);
+        nvgpu_mutex_release(&pmu->pmu_copy_lock);
        return;
 }
@@ -2235,7 +2235,7 @@ void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
                return;
        }
-        mutex_lock(&pmu->pmu_copy_lock);
+        nvgpu_mutex_acquire(&pmu->pmu_copy_lock);
        words = size >> 2;
        bytes = size & 0x3;
@@ -2265,7 +2265,7 @@ void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
                        "copy failed. bytes written %d, expected %d",
                        data - dst, size);
        }
-        mutex_unlock(&pmu->pmu_copy_lock);
+        nvgpu_mutex_release(&pmu->pmu_copy_lock);
        return;
 }
@@ -2571,17 +2571,17 @@ static int pmu_seq_acquire(struct pmu_gk20a *pmu,
        struct pmu_sequence *seq;
        u32 index;
-        mutex_lock(&pmu->pmu_seq_lock);
+        nvgpu_mutex_acquire(&pmu->pmu_seq_lock);
        index = find_first_zero_bit(pmu->pmu_seq_tbl,
                                sizeof(pmu->pmu_seq_tbl));
        if (index >= sizeof(pmu->pmu_seq_tbl)) {
                gk20a_err(dev_from_gk20a(g),
                        "no free sequence available");
-                mutex_unlock(&pmu->pmu_seq_lock);
+                nvgpu_mutex_release(&pmu->pmu_seq_lock);
                return -EAGAIN;
        }
        set_bit(index, pmu->pmu_seq_tbl);
-        mutex_unlock(&pmu->pmu_seq_lock);
+        nvgpu_mutex_release(&pmu->pmu_seq_lock);
        seq = &pmu->seq[index];
        seq->state = PMU_SEQ_STATE_PENDING;
@@ -2616,7 +2616,7 @@ static int pmu_queue_init(struct pmu_gk20a *pmu,
        queue->id       = id;
        g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
        queue->mutex_id = id;
-        mutex_init(&queue->mutex);
+        nvgpu_mutex_init(&queue->mutex);
        gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
                id, queue->index, queue->offset, queue->size);
@@ -2831,7 +2831,7 @@ static int pmu_queue_lock(struct pmu_gk20a *pmu,
                return 0;
        if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
-                mutex_lock(&queue->mutex);
+                nvgpu_mutex_acquire(&queue->mutex);
                return 0;
        }
@@ -2848,7 +2848,7 @@ static int pmu_queue_unlock(struct pmu_gk20a *pmu,
                return 0;
        if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
-                mutex_unlock(&queue->mutex);
+                nvgpu_mutex_release(&queue->mutex);
                return 0;
        }
@@ -3245,10 +3245,10 @@ static int gk20a_init_pmu_setup_hw1(struct gk20a *g)
        gk20a_dbg_fn("");
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        pmu_reset(pmu);
        pmu->isr_enabled = true;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        /* setup apertures - virtual */
        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
@@ -4530,9 +4530,9 @@ void gk20a_pmu_isr(struct gk20a *g)
        gk20a_dbg_fn("");
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        if (!pmu->isr_enabled) {
-                mutex_unlock(&pmu->isr_mutex);
+                nvgpu_mutex_release(&pmu->isr_mutex);
                return;
        }
@@ -4546,7 +4546,7 @@ void gk20a_pmu_isr(struct gk20a *g)
        intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
        if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
                gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
-                mutex_unlock(&pmu->isr_mutex);
+                nvgpu_mutex_release(&pmu->isr_mutex);
                return;
        }
@@ -4583,7 +4583,7 @@ void gk20a_pmu_isr(struct gk20a *g)
                                pwr_falcon_irqsset_swgen0_set_f());
        }
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
 }
 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
@@ -4987,7 +4987,7 @@ int gk20a_pmu_enable_elpg(struct gk20a *g)
        if (!support_gk20a_pmu(g->dev))
                return ret;
-        mutex_lock(&pmu->elpg_mutex);
+        nvgpu_mutex_acquire(&pmu->elpg_mutex);
        pmu->elpg_refcnt++;
        if (pmu->elpg_refcnt <= 0)
@@ -5026,7 +5026,7 @@ int gk20a_pmu_enable_elpg(struct gk20a *g)
        }
 exit_unlock:
-        mutex_unlock(&pmu->elpg_mutex);
+        nvgpu_mutex_release(&pmu->elpg_mutex);
        gk20a_dbg_fn("done");
        return ret;
 }
@@ -5049,7 +5049,7 @@ int gk20a_pmu_disable_elpg(struct gk20a *g)
        if (!support_gk20a_pmu(g->dev))
                return ret;
-        mutex_lock(&pmu->elpg_mutex);
+        nvgpu_mutex_acquire(&pmu->elpg_mutex);
        pmu->elpg_refcnt--;
        if (pmu->elpg_refcnt > 0) {
@@ -5138,7 +5138,7 @@ int gk20a_pmu_disable_elpg(struct gk20a *g)
 exit_reschedule:
 exit_unlock:
-        mutex_unlock(&pmu->elpg_mutex);
+        nvgpu_mutex_release(&pmu->elpg_mutex);
        gk20a_dbg_fn("done");
        return ret;
 }
@@ -5182,9 +5182,9 @@ int gk20a_pmu_destroy(struct gk20a *g)
        g->pg_ungating_time_us += (u64)pg_stat_data.ungating_time;
        g->pg_gating_cnt += pg_stat_data.gating_cnt;
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        pmu->isr_enabled = false;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        pmu->pmu_state = PMU_STATE_OFF;
        pmu->pmu_ready = false;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index e4513457..c1583eab 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -136,7 +136,7 @@ struct pmu_queue {
        u32 mutex_id;
        u32 mutex_lock;
        /* used by sw, for LPQ/HPQ queue */
-        struct mutex mutex;
+        struct nvgpu_mutex mutex;
        /* current write position */
        u32 position;
@@ -334,8 +334,8 @@ struct pmu_gk20a {
        struct pmu_mutex *mutex;
        u32 mutex_cnt;
-        struct mutex pmu_copy_lock;
+        struct nvgpu_mutex pmu_copy_lock;
-        struct mutex pmu_seq_lock;
+        struct nvgpu_mutex pmu_seq_lock;
        struct nvgpu_allocator dmem;
@@ -355,8 +355,8 @@ struct pmu_gk20a {
 #define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC        1 /* msec */
        struct work_struct pg_init;
-        struct mutex pg_mutex; /* protect pg-RPPG/MSCG enable/disable */
+        struct nvgpu_mutex pg_mutex; /* protect pg-RPPG/MSCG enable/disable */
-        struct mutex elpg_mutex; /* protect elpg enable/disable */
+        struct nvgpu_mutex elpg_mutex; /* protect elpg enable/disable */
        int elpg_refcnt; /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
        union {
@@ -375,7 +375,7 @@ struct pmu_gk20a {
        u32 load_shadow;
        u32 load_avg;
-        struct mutex isr_mutex;
+        struct nvgpu_mutex isr_mutex;
        bool isr_enabled;
        bool zbc_ready;
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
index 20cd1232..6fdc2774 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -46,29 +46,29 @@ ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
                return -EINVAL;
        size = sizeof(event);
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        while (!sched->status) {
-                mutex_unlock(&sched->status_lock);
+                nvgpu_mutex_release(&sched->status_lock);
                if (filp->f_flags & O_NONBLOCK)
                        return -EAGAIN;
                err = wait_event_interruptible(sched->readout_wq,
                        sched->status);
                if (err)
                        return err;
-                mutex_lock(&sched->status_lock);
+                nvgpu_mutex_acquire(&sched->status_lock);
        }
        event.reserved = 0;
        event.status = sched->status;
        if (copy_to_user(buf, &event, size)) {
-                mutex_unlock(&sched->status_lock);
+                nvgpu_mutex_release(&sched->status_lock);
                return -EFAULT;
        }
        sched->status = 0;
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        return size;
 }
@@ -80,11 +80,11 @@ unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        poll_wait(filp, &sched->readout_wq, wait);
        if (sched->status)
                mask |= POLLIN | POLLRDNORM;
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        return mask;
 }
@@ -100,13 +100,13 @@ static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
                return -ENOSPC;
        }
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
                sched->active_tsg_bitmap, sched->bitmap_size)) {
-                mutex_unlock(&sched->status_lock);
+                nvgpu_mutex_release(&sched->status_lock);
                return -EFAULT;
        }
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        return 0;
 }
@@ -122,15 +122,15 @@ static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
                return -ENOSPC;
        }
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
                sched->recent_tsg_bitmap, sched->bitmap_size)) {
-                mutex_unlock(&sched->status_lock);
+                nvgpu_mutex_release(&sched->status_lock);
                return -EFAULT;
        }
        memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        return 0;
 }
@@ -158,7 +158,7 @@ static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
        if (!bitmap)
                return -ENOMEM;
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
                if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
                        tsg = &f->tsg[tsgid];
@@ -166,7 +166,7 @@ static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
                                NVGPU_SCHED_SET(tsgid, bitmap);
                }
        }
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
                bitmap, sched->bitmap_size))
@@ -283,9 +283,9 @@ static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
 {
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
-        mutex_lock(&sched->control_lock);
+        nvgpu_mutex_acquire(&sched->control_lock);
        sched->control_locked = true;
-        mutex_unlock(&sched->control_lock);
+        nvgpu_mutex_release(&sched->control_lock);
        return 0;
 }
@@ -293,9 +293,9 @@ static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
 {
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
-        mutex_lock(&sched->control_lock);
+        nvgpu_mutex_acquire(&sched->control_lock);
        sched->control_locked = false;
-        mutex_unlock(&sched->control_lock);
+        nvgpu_mutex_release(&sched->control_lock);
        return 0;
 }
@@ -325,12 +325,12 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
        if (!kref_get_unless_zero(&tsg->refcount))
                return -ENXIO;
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
                gk20a_warn(dev_from_gk20a(g),
                        "tsgid=%d already referenced", tsgid);
                /* unlock status_lock as gk20a_tsg_release locks it */
-                mutex_unlock(&sched->status_lock);
+                nvgpu_mutex_release(&sched->status_lock);
                kref_put(&tsg->refcount, gk20a_tsg_release);
                return -ENXIO;
        }
@@ -339,7 +339,7 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
         * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
         */
        NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        return 0;
 }
@@ -357,15 +357,15 @@ static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
        if (tsgid >= f->num_channels)
                return -EINVAL;
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
-                mutex_unlock(&sched->status_lock);
+                nvgpu_mutex_release(&sched->status_lock);
                gk20a_warn(dev_from_gk20a(g),
                        "tsgid=%d not previously referenced", tsgid);
                return -ENXIO;
        }
        NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        tsg = &f->tsg[tsgid];
        kref_put(&tsg->refcount, gk20a_tsg_release);
@@ -390,7 +390,7 @@ int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
                gk20a_idle(g->dev);
        }
-        if (!mutex_trylock(&sched->busy_lock))
+        if (!nvgpu_mutex_tryacquire(&sched->busy_lock))
                return -EBUSY;
        memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
@@ -506,11 +506,11 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
        }
        /* unlock control */
-        mutex_lock(&sched->control_lock);
+        nvgpu_mutex_acquire(&sched->control_lock);
        sched->control_locked = false;
-        mutex_unlock(&sched->control_lock);
+        nvgpu_mutex_release(&sched->control_lock);
-        mutex_unlock(&sched->busy_lock);
+        nvgpu_mutex_release(&sched->busy_lock);
        return 0;
 }
@@ -530,16 +530,16 @@ static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
        if (err)
                return err;
-        if (mutex_trylock(&sched->busy_lock)) {
+        if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
                sched_busy = false;
-                mutex_unlock(&sched->busy_lock);
+                nvgpu_mutex_release(&sched->busy_lock);
        }
        seq_printf(s, "control_locked=%d\n", sched->control_locked);
        seq_printf(s, "busy=%d\n", sched_busy);
        seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        seq_puts(s, "active_tsg_bitmap\n");
        for (i = 0; i < n; i++)
@@ -549,7 +549,7 @@ static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
        for (i = 0; i < n; i++)
                seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        gk20a_idle(g->dev);
@@ -594,11 +594,11 @@ void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
                gk20a_idle(g->dev);
        }
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
        NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
        sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
        wake_up_interruptible(&sched->readout_wq);
 }
@@ -608,7 +608,7 @@ void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-        mutex_lock(&sched->status_lock);
+        nvgpu_mutex_acquire(&sched->status_lock);
        NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
        /* clear recent_tsg_bitmap as well: if app manager did not
@@ -621,7 +621,7 @@ void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
        /* do not set event_pending, we only want to notify app manager
         * when TSGs are added, so that it can apply sched params
         */
-        mutex_unlock(&sched->status_lock);
+        nvgpu_mutex_release(&sched->status_lock);
 }
 int gk20a_sched_ctrl_init(struct gk20a *g)
@@ -652,9 +652,9 @@ int gk20a_sched_ctrl_init(struct gk20a *g)
                goto free_recent;
        init_waitqueue_head(&sched->readout_wq);
-        mutex_init(&sched->status_lock);
+        nvgpu_mutex_init(&sched->status_lock);
-        mutex_init(&sched->control_lock);
+        nvgpu_mutex_init(&sched->control_lock);
-        mutex_init(&sched->busy_lock);
+        nvgpu_mutex_init(&sched->busy_lock);
        sched->sw_ready = true;
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
index 0ae13783..1f983678 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -21,11 +21,11 @@ struct tsg_gk20a;
 struct gk20a_sched_ctrl {
        struct gk20a *g;
-        struct mutex control_lock;
+        struct nvgpu_mutex control_lock;
        bool control_locked;
        bool sw_ready;
-        struct mutex status_lock;
+        struct nvgpu_mutex status_lock;
-        struct mutex busy_lock;
+        struct nvgpu_mutex busy_lock;
        u64 status;
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index edfe3deb..f57871d5 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -20,8 +20,7 @@
 #include <linux/hrtimer.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <uapi/linux/nvgpu.h>
 #include <nvgpu/semaphore.h>
@@ -55,7 +54,7 @@ struct gk20a_sync_pt {
         * than a mutex - there should be very little contention on this
         * lock.
         */
-        spinlock_t                      lock;
+        struct nvgpu_spinlock                   lock;
 };
 struct gk20a_sync_pt_inst {
@@ -242,7 +241,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
                }
        }
-        spin_lock_init(&shared->lock);
+        nvgpu_spinlock_init(&shared->lock);
        nvgpu_semaphore_get(sema);
@@ -304,7 +303,7 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
 #endif
        bool signaled = true;
-        spin_lock(&pt->lock);
+        nvgpu_spinlock_acquire(&pt->lock);
        if (!pt->sema)
                goto done;
@@ -345,7 +344,7 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
                pt->sema = NULL;
        }
 done:
-        spin_unlock(&pt->lock);
+        nvgpu_spinlock_release(&pt->lock);
        return signaled;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 96d6873d..aadf5463 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -169,7 +169,7 @@ int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
        init_rwsem(&tsg->ch_list_lock);
        INIT_LIST_HEAD(&tsg->event_id_list);
-        mutex_init(&tsg->event_id_list_lock);
+        nvgpu_mutex_init(&tsg->event_id_list_lock);
        return 0;
 }
@@ -204,7 +204,7 @@ static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
        struct gk20a_event_id_data *local_event_id_data;
        bool event_found = false;
-        mutex_lock(&tsg->event_id_list_lock);
+        nvgpu_mutex_acquire(&tsg->event_id_list_lock);
        list_for_each_entry(local_event_id_data, &tsg->event_id_list,
                                                 event_id_node) {
                if (local_event_id_data->event_id == event_id) {
@@ -212,7 +212,7 @@ static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
                        break;
                }
        }
-        mutex_unlock(&tsg->event_id_list_lock);
+        nvgpu_mutex_release(&tsg->event_id_list_lock);
        if (event_found) {
                *event_id_data = local_event_id_data;
@@ -233,7 +233,7 @@ void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
        if (err)
                return;
-        mutex_lock(&event_id_data->lock);
+        nvgpu_mutex_acquire(&event_id_data->lock);
        gk20a_dbg_info(
                "posting event for event_id=%d on tsg=%d\n",
@@ -242,7 +242,7 @@ void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
        wake_up_interruptible_all(&event_id_data->event_id_wq);
-        mutex_unlock(&event_id_data->lock);
+        nvgpu_mutex_release(&event_id_data->lock);
 }
 static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
@@ -287,12 +287,12 @@ static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
        event_id_data->event_id = event_id;
        init_waitqueue_head(&event_id_data->event_id_wq);
-        mutex_init(&event_id_data->lock);
+        nvgpu_mutex_init(&event_id_data->lock);
        INIT_LIST_HEAD(&event_id_data->event_id_node);
-        mutex_lock(&tsg->event_id_list_lock);
+        nvgpu_mutex_acquire(&tsg->event_id_list_lock);
        list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
-        mutex_unlock(&tsg->event_id_list_lock);
+        nvgpu_mutex_release(&tsg->event_id_list_lock);
        fd_install(local_fd, file);
        file->private_data = event_id_data;
@@ -370,9 +370,9 @@ int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
 static void release_used_tsg(struct fifo_gk20a *f, struct tsg_gk20a *tsg)
 {
-        mutex_lock(&f->tsg_inuse_mutex);
+        nvgpu_mutex_acquire(&f->tsg_inuse_mutex);
        f->tsg[tsg->tsgid].in_use = false;
-        mutex_unlock(&f->tsg_inuse_mutex);
+        nvgpu_mutex_release(&f->tsg_inuse_mutex);
 }
 static struct tsg_gk20a *acquire_unused_tsg(struct fifo_gk20a *f)
@@ -380,7 +380,7 @@ static struct tsg_gk20a *acquire_unused_tsg(struct fifo_gk20a *f)
        struct tsg_gk20a *tsg = NULL;
        unsigned int tsgid;
-        mutex_lock(&f->tsg_inuse_mutex);
+        nvgpu_mutex_acquire(&f->tsg_inuse_mutex);
        for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
                if (!f->tsg[tsgid].in_use) {
                        f->tsg[tsgid].in_use = true;
@@ -388,7 +388,7 @@ static struct tsg_gk20a *acquire_unused_tsg(struct fifo_gk20a *f)
                        break;
                }
        }
-        mutex_unlock(&f->tsg_inuse_mutex);
+        nvgpu_mutex_release(&f->tsg_inuse_mutex);
        return tsg;
 }
@@ -482,13 +482,13 @@ void gk20a_tsg_release(struct kref *ref)
        gk20a_sched_ctrl_tsg_removed(g, tsg);
        /* unhook all events created on this TSG */
-        mutex_lock(&tsg->event_id_list_lock);
+        nvgpu_mutex_acquire(&tsg->event_id_list_lock);
        list_for_each_entry_safe(event_id_data, event_id_data_temp,
                                &tsg->event_id_list,
                                event_id_node) {
                list_del_init(&event_id_data->event_id_node);
        }
-        mutex_unlock(&tsg->event_id_list_lock);
+        nvgpu_mutex_release(&tsg->event_id_list_lock);
        release_used_tsg(&g->fifo, tsg);
@@ -517,7 +517,7 @@ static int gk20a_tsg_ioctl_set_priority(struct gk20a *g,
        struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
        int err;
-        mutex_lock(&sched->control_lock);
+        nvgpu_mutex_acquire(&sched->control_lock);
        if (sched->control_locked) {
                err = -EPERM;
                goto done;
@@ -533,7 +533,7 @@ static int gk20a_tsg_ioctl_set_priority(struct gk20a *g,
        gk20a_idle(g->dev);
 done:
-        mutex_unlock(&sched->control_lock);
+        nvgpu_mutex_release(&sched->control_lock);
        return err;
 }
@@ -545,7 +545,7 @@ static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-        mutex_lock(&sched->control_lock);
+        nvgpu_mutex_acquire(&sched->control_lock);
        if (sched->control_locked) {
                err = -EPERM;
                goto done;
@@ -560,7 +560,7 @@ static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
        gk20a_idle(g->dev);
 done:
-        mutex_unlock(&sched->control_lock);
+        nvgpu_mutex_release(&sched->control_lock);
        return err;
 }
@@ -572,7 +572,7 @@ static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-        mutex_lock(&sched->control_lock);
+        nvgpu_mutex_acquire(&sched->control_lock);
        if (sched->control_locked) {
                err = -EPERM;
                goto done;
@@ -585,7 +585,7 @@ static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
        err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
        gk20a_idle(g->dev);
 done:
-        mutex_unlock(&sched->control_lock);
+        nvgpu_mutex_release(&sched->control_lock);
        return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index a6642682..f95ae008 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -16,6 +16,8 @@
 #ifndef __TSG_GK20A_H_
 #define __TSG_GK20A_H_
+#include <nvgpu/lock.h>
 #define NVGPU_INVALID_TSG_ID (-1)
 struct channel_gk20a;
@@ -58,7 +60,7 @@ struct tsg_gk20a {
        u32 interleave_level;
        struct list_head event_id_list;
-        struct mutex event_id_list_lock;
+        struct nvgpu_mutex event_id_list_lock;
        u32 runlist_id;
        pid_t tgid;
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 40a28136..c1cefc29 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -77,10 +77,10 @@ static get_ucode_details pmu_acr_supp_ucode_list[] = {
 static void start_gm20b_pmu(struct gk20a *g)
 {
        /*disable irqs for hs falcon booting as we will poll for halt*/
-        mutex_lock(&g->pmu.isr_mutex);
+        nvgpu_mutex_acquire(&g->pmu.isr_mutex);
        pmu_enable_irq(&g->pmu, true);
        g->pmu.isr_enabled = true;
-        mutex_unlock(&g->pmu.isr_mutex);
+        nvgpu_mutex_release(&g->pmu.isr_mutex);
        gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
                pwr_falcon_cpuctl_startcpu_f(1));
 }
@@ -1282,10 +1282,10 @@ int gm20b_init_nspmu_setup_hw1(struct gk20a *g)
        gk20a_dbg_fn("");
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        pmu_reset(pmu);
        pmu->isr_enabled = true;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        /* setup apertures - virtual */
        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
@@ -1318,10 +1318,10 @@ static int gm20b_init_pmu_setup_hw1(struct gk20a *g,
        gk20a_dbg_fn("");
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        g->ops.pmu.reset(g);
        pmu->isr_enabled = true;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        /* setup apertures - virtual */
        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
@@ -1353,10 +1353,10 @@ static int gm20b_init_pmu_setup_hw1(struct gk20a *g,
                        (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
                        g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
        /*disable irqs for hs falcon booting as we will poll for halt*/
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        pmu_enable_irq(pmu, false);
        pmu->isr_enabled = false;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        /*Clearing mailbox register used to reflect capabilities*/
        gk20a_writel(g, pwr_falcon_mailbox1_r(), 0);
        err = bl_bootstrap(pmu, desc, bl_sz);
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
index 8db4944e..fc352151 100644
--- a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
@@ -1191,7 +1191,7 @@ static int gm20b_init_clk_setup_sw(struct gk20a *g)
        }
 #endif
-        mutex_init(&clk->clk_mutex);
+        nvgpu_mutex_init(&clk->clk_mutex);
        clk->sw_ready = true;
@@ -1212,10 +1212,10 @@ static int gm20b_clk_prepare(struct clk_hw *hw)
        struct clk_gk20a *clk = to_clk_gk20a(hw);
        int ret = 0;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (!clk->gpc_pll.enabled && clk->clk_hw_on)
                ret = set_pll_freq(clk->g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        return ret;
 }
@@ -1223,10 +1223,10 @@ static void gm20b_clk_unprepare(struct clk_hw *hw)
 {
        struct clk_gk20a *clk = to_clk_gk20a(hw);
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (clk->gpc_pll.enabled && clk->clk_hw_on)
                clk_disable_gpcpll(clk->g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
 }
 static int gm20b_clk_is_prepared(struct clk_hw *hw)
@@ -1250,12 +1250,12 @@ static int gm20b_gpcclk_set_rate(struct clk_hw *hw, unsigned long rate,
        u32 old_freq;
        int ret = -ENODATA;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        old_freq = clk->gpc_pll.freq;
        ret = set_pll_target(clk->g, rate_gpu_to_gpc2clk(rate), old_freq);
        if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
                ret = set_pll_freq(clk->g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        return ret;
 }
@@ -1272,7 +1272,7 @@ static long gm20b_round_rate(struct clk_hw *hw, unsigned long rate,
        if (rate > maxrate)
                rate = maxrate;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        freq = rate_gpu_to_gpc2clk(rate);
        if (freq > gpc_pll_params.max_freq)
                freq = gpc_pll_params.max_freq;
@@ -1281,7 +1281,7 @@ static long gm20b_round_rate(struct clk_hw *hw, unsigned long rate,
        tmp_pll = clk->gpc_pll;
        clk_config_pll(clk, &tmp_pll, &gpc_pll_params, &freq, true);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        return rate_gpc2clk_to_gpu(tmp_pll.freq);
 }
@@ -1445,14 +1445,14 @@ static int gm20b_clk_export_set_rate(void *data, unsigned long *rate)
        struct clk_gk20a *clk = &g->clk;
        if (rate) {
-                mutex_lock(&clk->clk_mutex);
+                nvgpu_mutex_acquire(&clk->clk_mutex);
                old_freq = clk->gpc_pll.freq;
                ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
                if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
                        ret = set_pll_freq(g, 1);
                if (!ret)
                        *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-                mutex_unlock(&clk->clk_mutex);
+                nvgpu_mutex_release(&clk->clk_mutex);
        }
        return ret;
 }
@@ -1463,10 +1463,10 @@ static int gm20b_clk_export_enable(void *data)
        struct gk20a *g = data;
        struct clk_gk20a *clk = &g->clk;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (!clk->gpc_pll.enabled && clk->clk_hw_on)
                ret = set_pll_freq(g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        return ret;
 }
@@ -1475,10 +1475,10 @@ static void gm20b_clk_export_disable(void *data)
        struct gk20a *g = data;
        struct clk_gk20a *clk = &g->clk;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (clk->gpc_pll.enabled && clk->clk_hw_on)
                clk_disable_gpcpll(g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
 }
 static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
@@ -1486,12 +1486,12 @@ static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
        struct gk20a *g = data;
        struct clk_gk20a *clk = &g->clk;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (state)
                *state = clk->gpc_pll.enabled;
        if (rate)
                *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
 }
 static struct tegra_clk_export_ops gm20b_clk_export_ops = {
@@ -1539,11 +1539,11 @@ static int gm20b_init_clk_support(struct gk20a *g)
                return err;
 #endif
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        clk->clk_hw_on = true;
        err = gm20b_init_clk_setup_hw(g);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        if (err)
                return err;
@@ -1559,10 +1559,10 @@ static int gm20b_init_clk_support(struct gk20a *g)
                return err;
        /* The prev call may not enable PLL if gbus is unbalanced - force it */
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        if (!clk->gpc_pll.enabled)
                err = set_pll_freq(g, 1);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        if (err)
                return err;
@@ -1582,11 +1582,11 @@ static int gm20b_suspend_clk_support(struct gk20a *g)
        clk_disable_unprepare(g->clk.tegra_clk);
        /* The prev call may not disable PLL if gbus is unbalanced - force it */
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        if (g->clk.gpc_pll.enabled)
                ret = clk_disable_gpcpll(g, 1);
        g->clk.clk_hw_on = false;
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        return ret;
 }
@@ -1616,11 +1616,11 @@ static int pll_reg_show(struct seq_file *s, void *data)
        struct gk20a *g = s->private;
        u32 reg, m, n, pl, f;
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        if (!g->clk.clk_hw_on) {
                seq_printf(s, "%s powered down - no access to registers\n",
                           dev_name(dev_from_gk20a(g)));
-                mutex_unlock(&g->clk.clk_mutex);
+                nvgpu_mutex_release(&g->clk.clk_mutex);
                return 0;
        }
@@ -1642,7 +1642,7 @@ static int pll_reg_show(struct seq_file *s, void *data)
        f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div(pl));
        seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
        seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        return 0;
 }
@@ -1663,11 +1663,11 @@ static int pll_reg_raw_show(struct seq_file *s, void *data)
        struct gk20a *g = s->private;
        u32 reg;
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        if (!g->clk.clk_hw_on) {
                seq_printf(s, "%s powered down - no access to registers\n",
                           dev_name(dev_from_gk20a(g)));
-                mutex_unlock(&g->clk.clk_mutex);
+                nvgpu_mutex_release(&g->clk.clk_mutex);
                return 0;
        }
@@ -1685,7 +1685,7 @@ static int pll_reg_raw_show(struct seq_file *s, void *data)
        reg = trim_sys_bypassctrl_r();
        seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        return 0;
 }
@@ -1722,13 +1722,13 @@ static ssize_t pll_reg_raw_write(struct file *file,
            (reg != trim_sys_bypassctrl_r()))
                return -EPERM;
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        if (!g->clk.clk_hw_on) {
-                mutex_unlock(&g->clk.clk_mutex);
+                nvgpu_mutex_release(&g->clk.clk_mutex);
                return -EBUSY;
        }
        gk20a_writel(g, reg, val);
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        return count;
 }
@@ -1755,7 +1755,7 @@ static int monitor_get(void *data, u64 *val)
        if (err)
                return err;
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        /* Disable clock slowdown during measurements */
        clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
@@ -1787,7 +1787,7 @@ static int monitor_get(void *data, u64 *val)
        /* Restore clock slowdown */
        gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        gk20a_idle(g->dev);
@@ -1811,14 +1811,14 @@ static int voltage_get(void *data, u64 *val)
        if (err)
                return err;
-        mutex_lock(&g->clk.clk_mutex);
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
        det_out = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
        det_out = trim_sys_gpcpll_cfg3_dfs_testout_v(det_out);
        *val = div64_u64((u64)det_out * gpc_pll_params.uvdet_slope +
                gpc_pll_params.uvdet_offs, 1000ULL);
-        mutex_unlock(&g->clk.clk_mutex);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
        gk20a_idle(g->dev);
        return 0;
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
index 7ea84826..5746165e 100644
--- a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
@@ -1,7 +1,7 @@
 /*
 * GM20B Graphics
 *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -19,7 +19,7 @@
 #ifndef _NVHOST_CLK_GM20B_H_
 #define _NVHOST_CLK_GM20B_H_
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 void gm20b_init_clk_ops(struct gpu_ops *gops);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 0375d71f..9cf644fd 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1247,7 +1247,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
                        gr_gpc0_tpc0_sm_cfg_r() + offset));
@@ -1263,7 +1263,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
        gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
                        gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
 }
@@ -1280,7 +1280,7 @@ static int gm20b_gr_update_sm_error_state(struct gk20a *g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        int err = 0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        gr->sm_error_states[sm_id].hww_global_esr =
                        sm_error_state->hww_global_esr;
@@ -1336,7 +1336,7 @@ enable_ctxsw:
        err = gr_gk20a_enable_ctxsw(g);
 fail:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
@@ -1351,7 +1351,7 @@ static int gm20b_gr_clear_sm_error_state(struct gk20a *g,
                                               GPU_LIT_TPC_IN_GPC_STRIDE);
        int err = 0;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
@@ -1377,7 +1377,7 @@ static int gm20b_gr_clear_sm_error_state(struct gk20a *g,
        err = gr_gk20a_enable_ctxsw(g);
 fail:
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return err;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 3324d3df..11258032 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -120,7 +120,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
        if (gr->compbit_store.mem.size == 0)
                return 0;
-        mutex_lock(&g->mm.l2_op_lock);
+        nvgpu_mutex_acquire(&g->mm.l2_op_lock);
        if (op == gk20a_cbc_op_clear) {
                gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
@@ -163,7 +163,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
        }
 out:
        trace_gk20a_ltc_cbc_ctrl_done(dev_name(g->dev));
-        mutex_unlock(&g->mm.l2_op_lock);
+        nvgpu_mutex_release(&g->mm.l2_op_lock);
        return err;
 }
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index edeb97ec..3b9c444d 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -1,7 +1,7 @@
 /*
 * GP106 Clocks
 *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -79,7 +79,7 @@ static int gp106_init_clk_support(struct gk20a *g) {
        gk20a_dbg_fn("");
-        mutex_init(&clk->clk_mutex);
+        nvgpu_mutex_init(&clk->clk_mutex);
        clk->clk_namemap = (struct namemap_cfg *)
                kzalloc(sizeof(struct namemap_cfg) * NUM_NAMEMAPS, GFP_KERNEL);
@@ -169,7 +169,7 @@ static u32 gp106_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) {
        if (!c || !c->cntr.reg_ctrl_addr || !c->cntr.reg_cntr_addr)
                return 0;
-        mutex_lock(&clk->clk_mutex);
+        nvgpu_mutex_acquire(&clk->clk_mutex);
        /* Save the register */
        save_reg = gk20a_readl(g, c->cntr.reg_ctrl_addr);
@@ -216,7 +216,7 @@ read_err:
        gk20a_readl(g, c->cntr.reg_ctrl_addr);
        gk20a_writel(g, c->cntr.reg_ctrl_addr, save_reg);
        gk20a_readl(g, c->cntr.reg_ctrl_addr);
-        mutex_unlock(&clk->clk_mutex);
+        nvgpu_mutex_release(&clk->clk_mutex);
        return cntr;
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.h b/drivers/gpu/nvgpu/gp106/clk_gp106.h
index 7df4b974..3c2e31d1 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -16,7 +16,7 @@
 #ifndef CLK_GP106_H
 #define CLK_GP106_H
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 #define CLK_NAMEMAP_INDEX_GPC2CLK       0x00
 #define CLK_NAMEMAP_INDEX_XBAR2CLK      0x02
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 9af16886..51e76605 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -114,7 +114,7 @@ void sec2_copy_to_dmem(struct pmu_gk20a *pmu,
                return;
        }
-        mutex_lock(&pmu->pmu_copy_lock);
+        nvgpu_mutex_acquire(&pmu->pmu_copy_lock);
        words = size >> 2;
        bytes = size & 0x3;
@@ -144,7 +144,7 @@ void sec2_copy_to_dmem(struct pmu_gk20a *pmu,
                        "copy failed. bytes written %d, expected %d",
                        data - dst, size);
        }
-        mutex_unlock(&pmu->pmu_copy_lock);
+        nvgpu_mutex_release(&pmu->pmu_copy_lock);
        return;
 }
@@ -348,10 +348,10 @@ int init_sec2_setup_hw1(struct gk20a *g,
        gk20a_dbg_fn("");
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        g->ops.pmu.reset(g);
        pmu->isr_enabled = true;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        data = gk20a_readl(g, psec_fbif_ctl_r());
        data |= psec_fbif_ctl_allow_phys_no_ctx_allow_f();
@@ -379,11 +379,11 @@ int init_sec2_setup_hw1(struct gk20a *g,
                        psec_fbif_transcfg_target_noncoherent_sysmem_f());
        /*disable irqs for hs falcon booting as we will poll for halt*/
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        pmu_enable_irq(pmu, false);
        sec_enable_irq(pmu, false);
        pmu->isr_enabled = false;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        err = bl_bootstrap_sec2(pmu, desc, bl_sz);
        if (err)
                return err;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index e680e753..da121b56 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1997,16 +1997,16 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
        int local_ctx_resident_ch_fd = -1;
        bool ctx_resident;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        err = gr_gk20a_disable_ctxsw(g);
        if (err) {
                gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
-                mutex_unlock(&g->dbg_sessions_lock);
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
                goto clean_up;
        }
-        mutex_lock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
        list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
                ch = g->fifo.channel + ch_data->chid;
@@ -2019,15 +2019,15 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
                        cilp_preempt_pending_ch = ch;
        }
-        mutex_unlock(&dbg_s->ch_list_lock);
+        nvgpu_mutex_release(&dbg_s->ch_list_lock);
        err = gr_gk20a_enable_ctxsw(g);
        if (err) {
-                mutex_unlock(&g->dbg_sessions_lock);
+                nvgpu_mutex_release(&g->dbg_sessions_lock);
                goto clean_up;
        }
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        if (cilp_preempt_pending_ch) {
                struct channel_ctx_gk20a *ch_ctx =
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index b989e6a4..9c565729 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -306,10 +306,10 @@ static int gp10b_init_pmu_setup_hw1(struct gk20a *g)
        gk20a_dbg_fn("");
-        mutex_lock(&pmu->isr_mutex);
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
        pmu_reset(pmu);
        pmu->isr_enabled = true;
-        mutex_unlock(&pmu->isr_mutex);
+        nvgpu_mutex_release(&pmu->isr_mutex);
        /* setup apertures - virtual */
        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index d5a90c87..16fe2641 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/platform_device.h>
+#include <nvgpu/lock.h>
 /* #define ALLOCATOR_DEBUG */
@@ -78,7 +79,7 @@ struct nvgpu_allocator_ops {
 struct nvgpu_allocator {
        char name[32];
-        struct mutex lock;
+        struct nvgpu_mutex lock;
        void *priv;
        const struct nvgpu_allocator_ops *ops;
@@ -167,12 +168,12 @@ struct nvgpu_alloc_carveout {
 static inline void alloc_lock(struct nvgpu_allocator *a)
 {
-        mutex_lock(&a->lock);
+        nvgpu_mutex_acquire(&a->lock);
 }
 static inline void alloc_unlock(struct nvgpu_allocator *a)
 {
-        mutex_unlock(&a->lock);
+        nvgpu_mutex_release(&a->lock);
 }
 /*
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index cc4921d3..6f479383 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -84,7 +84,7 @@ struct nvgpu_semaphore_pool {
        struct nvgpu_semaphore_sea *sema_sea;   /* Sea that owns this pool. */
-        struct mutex pool_lock;
+        struct nvgpu_mutex pool_lock;
        /*
         * This is the address spaces's personal RW table. Other channels will
@@ -143,7 +143,7 @@ struct nvgpu_semaphore_sea {
         */
        DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
-        struct mutex sea_lock;          /* Lock alloc/free calls. */
+        struct nvgpu_mutex sea_lock;            /* Lock alloc/free calls. */
 };
 /*
diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.c b/drivers/gpu/nvgpu/lpwr/lpwr.c
index 9636891b..b722a900 100644
--- a/drivers/gpu/nvgpu/lpwr/lpwr.c
+++ b/drivers/gpu/nvgpu/lpwr/lpwr.c
@@ -346,7 +346,7 @@ int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock)
        if (pstate_lock)
                nvgpu_clk_arb_pstate_change_lock(g, true);
-        mutex_lock(&pmu->pg_mutex);
+        nvgpu_mutex_acquire(&pmu->pg_mutex);
        present_pstate = nvgpu_clk_arb_get_current_pstate(g);
@@ -367,7 +367,7 @@ int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock)
                        status = gk20a_pmu_enable_elpg(g);
        }
-        mutex_unlock(&pmu->pg_mutex);
+        nvgpu_mutex_release(&pmu->pg_mutex);
        if (pstate_lock)
                nvgpu_clk_arb_pstate_change_lock(g, false);
@@ -386,7 +386,7 @@ int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock)
        if (pstate_lock)
                nvgpu_clk_arb_pstate_change_lock(g, true);
-        mutex_lock(&pmu->pg_mutex);
+        nvgpu_mutex_acquire(&pmu->pg_mutex);
        present_pstate = nvgpu_clk_arb_get_current_pstate(g);
@@ -411,7 +411,7 @@ int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock)
        }
 exit_unlock:
-        mutex_unlock(&pmu->pg_mutex);
+        nvgpu_mutex_release(&pmu->pg_mutex);
        if (pstate_lock)
                nvgpu_clk_arb_pstate_change_lock(g, false);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index feb8cca8..dcb8464c 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -1,7 +1,7 @@
 /*
 * general p state infrastructure
 *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -333,7 +333,7 @@ static int pstate_sw_setup(struct gk20a *g)
        gk20a_dbg_fn("");
        init_waitqueue_head(&g->perf_pmu.pstatesobjs.pstate_notifier_wq);
-        mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex);
+        nvgpu_mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex);
        err = boardobjgrpconstruct_e32(&g->perf_pmu.pstatesobjs.super);
        if (err) {
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index af0956e8..22ba98b9 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -1,7 +1,7 @@
 /*
 * general p state infrastructure
 *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -50,7 +50,7 @@ struct pstates {
        u32  num_levels;
        wait_queue_head_t pstate_notifier_wq;
        u32 is_pstate_switch_on;
-        struct mutex pstate_mutex; /* protect is_pstate_switch_on */
+        struct nvgpu_mutex pstate_mutex; /* protect is_pstate_switch_on */
 };
 int gk20a_init_pstate_support(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 339f2237..7b6ed322 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -1,7 +1,7 @@
 /*
 * Virtualized GPU Fifo
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -218,7 +218,7 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
                                        goto clean_up_runlist;
                                }
                        }
-                mutex_init(&runlist->mutex);
+                nvgpu_mutex_init(&runlist->mutex);
                /* None of buffers is pinned if this value doesn't change.
                    Otherwise, one of them (cur_buffer) must have been pinned. */
@@ -294,7 +294,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
        init_runlist(g, f);
        INIT_LIST_HEAD(&f->free_chs);
-        mutex_init(&f->free_chs_mutex);
+        nvgpu_mutex_init(&f->free_chs_mutex);
        for (chid = 0; chid < f->num_channels; chid++) {
                f->channel[chid].userd_iova =
@@ -306,10 +306,10 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
                gk20a_init_channel_support(g, chid);
                gk20a_init_tsg_support(g, chid);
        }
-        mutex_init(&f->tsg_inuse_mutex);
+        nvgpu_mutex_init(&f->tsg_inuse_mutex);
        f->deferred_reset_pending = false;
-        mutex_init(&f->deferred_reset_mutex);
+        nvgpu_mutex_init(&f->deferred_reset_mutex);
        f->sw_ready = true;
@@ -534,12 +534,12 @@ static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
        runlist = &f->runlist_info[runlist_id];
-        mutex_lock(&runlist->mutex);
+        nvgpu_mutex_acquire(&runlist->mutex);
        ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
                                        wait_for_finish);
-        mutex_unlock(&runlist->mutex);
+        nvgpu_mutex_release(&runlist->mutex);
        return ret;
 }
@@ -679,7 +679,7 @@ static int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
                struct channel_gk20a *ch)
 {
-        mutex_lock(&ch->error_notifier_mutex);
+        nvgpu_mutex_acquire(&ch->error_notifier_mutex);
        if (ch->error_notifier_ref) {
                if (ch->error_notifier->status == 0xffff) {
                        /* If error code is already set, this mmu fault
@@ -691,7 +691,7 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
                                NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
                }
        }
-        mutex_unlock(&ch->error_notifier_mutex);
+        nvgpu_mutex_release(&ch->error_notifier_mutex);
        /* mark channel as faulted */
        ch->has_timedout = true;
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 36bbee00..5c637d25 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -866,7 +866,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
        gr->g = g;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-        mutex_init(&g->gr.cs_lock);
+        nvgpu_mutex_init(&g->gr.cs_lock);
 #endif
        err = vgpu_gr_init_gr_config(g, gr);
@@ -885,7 +885,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
        if (err)
                goto clean_up;
-        mutex_init(&gr->ctx_mutex);
+        nvgpu_mutex_init(&gr->ctx_mutex);
        gr->sm_error_states = kzalloc(
                        sizeof(struct nvgpu_dbg_gpu_sm_error_state_record) *
@@ -1078,9 +1078,9 @@ static int vgpu_gr_clear_sm_error_state(struct gk20a *g,
 {
        struct gr_gk20a *gr = &g->gr;
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
        return 0;
 }
@@ -1096,7 +1096,7 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
                return;
        }
-        mutex_lock(&g->dbg_sessions_lock);
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
        sm_error_states = &g->gr.sm_error_states[info->sm_id];
@@ -1108,7 +1108,7 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
        sm_error_states->hww_warp_esr_report_mask =
                                info->hww_warp_esr_report_mask;
-        mutex_unlock(&g->dbg_sessions_lock);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 void vgpu_init_gr_ops(struct gpu_ops *gops)
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index c6780cf7..70b50e7e 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -200,7 +200,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
        int err;
        gk20a_dbg_fn("");
-        mutex_lock(&vm->update_gmmu_lock);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        /* TBD: add a flag here for the unmap code to recognize teardown
         * and short-circuit any otherwise expensive operations. */
@@ -231,7 +231,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
        if (nvgpu_alloc_initialized(&vm->user))
                nvgpu_alloc_destroy(&vm->user);
-        mutex_unlock(&vm->update_gmmu_lock);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
        /* vm is not used anymore. release it. */
        kfree(vm);
@@ -401,7 +401,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        vm->mapped_buffers = RB_ROOT;
-        mutex_init(&vm->update_gmmu_lock);
+        nvgpu_mutex_init(&vm->update_gmmu_lock);
        kref_init(&vm->ref);
        INIT_LIST_HEAD(&vm->reserved_va_list);
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index ba387de9..4c88ab96 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -250,9 +250,9 @@ static int vgpu_init_support(struct platform_device *pdev)
        }
        g->bar1_mem = r;
-        mutex_init(&g->dbg_sessions_lock);
+        nvgpu_mutex_init(&g->dbg_sessions_lock);
-        mutex_init(&g->client_lock);
+        nvgpu_mutex_init(&g->client_lock);
-        mutex_init(&g->ch_wdt_lock);
+        nvgpu_mutex_init(&g->ch_wdt_lock);
        g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
        if (!g->dbg_regops_tmp_buf) {
@@ -576,7 +576,7 @@ int vgpu_probe(struct platform_device *pdev)
        init_rwsem(&gk20a->busy_lock);
-        spin_lock_init(&gk20a->mc_enable_lock);
+        nvgpu_spinlock_init(&gk20a->mc_enable_lock);
        /* Initialize the platform interface. */
        err = platform->probe(dev);