From 8ee3aa4b3175d8d27e57a0f5d5e2cdf3d78a4a58 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 24 Jan 2017 19:00:42 +0530
Subject: gpu: nvgpu: use common nvgpu mutex/spinlock APIs

Instead of using Linux APIs for mutex and spinlocks
directly, use new APIs defined in <nvgpu/lock.h>

Replace Linux specific mutex/spinlock declaration,
init, lock, unlock APIs with new APIs
e.g
struct mutex is replaced by struct nvgpu_mutex and
mutex_lock() is replaced by nvgpu_mutex_acquire()

And also include <nvgpu/lock.h> instead of including
<linux/mutex.h> and <linux/spinlock.h>

Add explicit nvgpu/lock.h includes to below
files to fix complilation failures.
gk20a/platform_gk20a.h
include/nvgpu/allocator.h

Jira NVGPU-13

Change-Id: I81a05d21ecdbd90c2076a9f0aefd0e40b215bd33
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1293187
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.c                |  50 +++---
 drivers/gpu/nvgpu/clk/clk_mclk.c               |  16 +-
 drivers/gpu/nvgpu/clk/clk_mclk.h               |   8 +-
 drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c  |   4 +-
 drivers/gpu/nvgpu/common/nvgpu_common.c        |  14 +-
 drivers/gpu/nvgpu/common/semaphore.c           |  18 +-
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c            |  50 +++---
 drivers/gpu/nvgpu/gk20a/cde_gk20a.h            |   4 +-
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c            |  46 +++---
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.h            |   6 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c        | 220 ++++++++++++-------------
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h        |  34 ++--
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c   |  12 +-
 drivers/gpu/nvgpu/gk20a/clk_gk20a.c            |  38 ++---
 drivers/gpu/nvgpu/gk20a/clk_gk20a.h            |   6 +-
 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c         |  24 +--
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c           |  28 ++--
 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c    |  52 +++---
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c        | 134 +++++++--------
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h        |   4 +-
 drivers/gpu/nvgpu/gk20a/debug_gk20a.c          |   2 +-
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c     |  32 ++--
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c           |  50 +++---
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h           |  12 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c                |  30 ++--
 drivers/gpu/nvgpu/gk20a/gk20a.h                |  20 +--
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c             |  64 +++----
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h             |  12 +-
 drivers/gpu/nvgpu/gk20a/ltc_common.c           |   6 +-
 drivers/gpu/nvgpu/gk20a/ltc_gk20a.c            |   6 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c             | 184 ++++++++++-----------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h             |  12 +-
 drivers/gpu/nvgpu/gk20a/platform_gk20a.h       |   4 +-
 drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c |   4 +-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c            |  54 +++---
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.h            |  12 +-
 drivers/gpu/nvgpu/gk20a/sched_gk20a.c          |  80 ++++-----
 drivers/gpu/nvgpu/gk20a/sched_gk20a.h          |   8 +-
 drivers/gpu/nvgpu/gk20a/sync_gk20a.c           |  11 +-
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.c            |  40 ++---
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.h            |   4 +-
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c            |  16 +-
 drivers/gpu/nvgpu/gm20b/clk_gm20b.c            |  72 ++++----
 drivers/gpu/nvgpu/gm20b/clk_gm20b.h            |   4 +-
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c             |  12 +-
 drivers/gpu/nvgpu/gm20b/ltc_gm20b.c            |   4 +-
 drivers/gpu/nvgpu/gp106/clk_gp106.c            |   8 +-
 drivers/gpu/nvgpu/gp106/clk_gp106.h            |   4 +-
 drivers/gpu/nvgpu/gp106/sec2_gp106.c           |  12 +-
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c             |  12 +-
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c            |   4 +-
 drivers/gpu/nvgpu/include/nvgpu/allocator.h    |   9 +-
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h    |   4 +-
 drivers/gpu/nvgpu/lpwr/lpwr.c                  |   8 +-
 drivers/gpu/nvgpu/pstate/pstate.c              |   4 +-
 drivers/gpu/nvgpu/pstate/pstate.h              |   4 +-
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c             |  18 +-
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c               |  12 +-
 drivers/gpu/nvgpu/vgpu/mm_vgpu.c               |   6 +-
 drivers/gpu/nvgpu/vgpu/vgpu.c                  |   8 +-
 60 files changed, 820 insertions(+), 816 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 9232c3dc..062e4e2b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -18,7 +18,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/nvgpu.h>
 #include <linux/bitops.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <linux/rculist.h>
 #include <linux/llist.h>
 #include "clk/clk_arb.h"
@@ -139,10 +139,10 @@ struct nvgpu_clk_arb_target {
 };
 
 struct nvgpu_clk_arb {
-	spinlock_t sessions_lock;
-	spinlock_t users_lock;
+	struct nvgpu_spinlock sessions_lock;
+	struct nvgpu_spinlock users_lock;
 
-	struct mutex pstate_lock;
+	struct nvgpu_mutex pstate_lock;
 	struct list_head users;
 	struct list_head sessions;
 	struct llist_head requests;
@@ -308,9 +308,9 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 	g->clk_arb = arb;
 	arb->g = g;
 
-	mutex_init(&arb->pstate_lock);
-	spin_lock_init(&arb->sessions_lock);
-	spin_lock_init(&arb->users_lock);
+	nvgpu_mutex_init(&arb->pstate_lock);
+	nvgpu_spinlock_init(&arb->sessions_lock);
+	nvgpu_spinlock_init(&arb->users_lock);
 
 	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
 			CTRL_CLK_DOMAIN_MCLK, &default_mhz);
@@ -546,9 +546,9 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 
 	init_llist_head(&session->targets);
 
-	spin_lock(&arb->sessions_lock);
+	nvgpu_spinlock_acquire(&arb->sessions_lock);
 	list_add_tail_rcu(&session->link, &arb->sessions);
-	spin_unlock(&arb->sessions_lock);
+	nvgpu_spinlock_release(&arb->sessions_lock);
 
 	*_session = session;
 
@@ -573,9 +573,9 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount)
 
 	gk20a_dbg_fn("");
 
-	spin_lock(&arb->sessions_lock);
+	nvgpu_spinlock_acquire(&arb->sessions_lock);
 	list_del_rcu(&session->link);
-	spin_unlock(&arb->sessions_lock);
+	nvgpu_spinlock_release(&arb->sessions_lock);
 
 	head = llist_del_all(&session->targets);
 	llist_for_each_entry_safe(dev, tmp, head, node) {
@@ -622,9 +622,9 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
 
 	dev->arb_queue_head = atomic_read(&arb->notification_queue.head);
 
-	spin_lock(&arb->users_lock);
+	nvgpu_spinlock_acquire(&arb->users_lock);
 	list_add_tail_rcu(&dev->link, &arb->users);
-	spin_unlock(&arb->users_lock);
+	nvgpu_spinlock_release(&arb->users_lock);
 
 	*event_fd = fd;
 
@@ -1128,13 +1128,13 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	/* Program clocks */
 	/* A change in both mclk of gpc2clk may require a change in voltage */
 
-	mutex_lock(&arb->pstate_lock);
+	nvgpu_mutex_acquire(&arb->pstate_lock);
 	status = nvgpu_lpwr_disable_pg(g, false);
 
 	status = clk_pmu_freq_controller_load(g, false);
 	if (status < 0) {
 		arb->status = status;
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
 		smp_mb();
@@ -1143,7 +1143,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
 	if (status < 0) {
 		arb->status = status;
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
 		smp_mb();
@@ -1155,7 +1155,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		voltuv_sram);
 	if (status < 0) {
 		arb->status = status;
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
 		smp_mb();
@@ -1165,7 +1165,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	status = clk_pmu_freq_controller_load(g, true);
 	if (status < 0) {
 		arb->status = status;
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
 		smp_mb();
@@ -1175,7 +1175,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	status = nvgpu_lwpr_mclk_change(g, pstate);
 	if (status < 0) {
 		arb->status = status;
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
 		smp_mb();
@@ -1200,7 +1200,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	status = nvgpu_lpwr_enable_pg(g, false);
 	if (status < 0) {
 		arb->status = status;
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 
 		/* make status visible */
 		smp_mb();
@@ -1212,7 +1212,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	atomic_inc(&arb->req_nr);
 
 	/* Unlock pstate change for PG */
-	mutex_unlock(&arb->pstate_lock);
+	nvgpu_mutex_release(&arb->pstate_lock);
 
 	/* VF Update complete */
 	nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));
@@ -1589,9 +1589,9 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
 
 	gk20a_dbg_fn("");
 
-	spin_lock(&arb->users_lock);
+	nvgpu_spinlock_acquire(&arb->users_lock);
 	list_del_rcu(&dev->link);
-	spin_unlock(&arb->users_lock);
+	nvgpu_spinlock_release(&arb->users_lock);
 
 	synchronize_rcu();
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
@@ -2000,9 +2000,9 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 
 	if (lock)
-		mutex_lock(&arb->pstate_lock);
+		nvgpu_mutex_acquire(&arb->pstate_lock);
 	else
-		mutex_unlock(&arb->pstate_lock);
+		nvgpu_mutex_release(&arb->pstate_lock);
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 815f55ba..c2e9b35c 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2185,8 +2185,8 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 
 	mclk = &g->clk_pmu.clk_mclk;
 
-	mutex_init(&mclk->mclk_lock);
-	mutex_init(&mclk->data_lock);
+	nvgpu_mutex_init(&mclk->mclk_lock);
+	nvgpu_mutex_init(&mclk->data_lock);
 
 	/* FBPA gain WAR */
 	gk20a_writel(g, fb_fbpa_fbio_iref_byte_rx_ctrl_r(), 0x22222222);
@@ -2257,7 +2257,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 
 	mclk = &g->clk_pmu.clk_mclk;
 
-	mutex_lock(&mclk->mclk_lock);
+	nvgpu_mutex_acquire(&mclk->mclk_lock);
 
 	if (!mclk->init)
 		goto exit_status;
@@ -2364,7 +2364,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 #ifdef CONFIG_DEBUG_FS
 	g->ops.read_ptimer(g, &t1);
 
-	mutex_lock(&mclk->data_lock);
+	nvgpu_mutex_acquire(&mclk->data_lock);
 	mclk->switch_num++;
 
 	if (mclk->switch_num == 1) {
@@ -2387,11 +2387,11 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 		mclk->switch_std +=
 			(curr - mclk->switch_avg) * (curr - prev_avg);
 	}
-	mutex_unlock(&mclk->data_lock);
+	nvgpu_mutex_release(&mclk->data_lock);
 #endif
 exit_status:
 
-	mutex_unlock(&mclk->mclk_lock);
+	nvgpu_mutex_release(&mclk->mclk_lock);
 	return status;
 }
 
@@ -2429,13 +2429,13 @@ static int mclk_switch_stats_show(struct seq_file *s, void *unused)
 	mclk = &g->clk_pmu.clk_mclk;
 
 	/* Make copy of structure to reduce time with lock held */
-	mutex_lock(&mclk->data_lock);
+	nvgpu_mutex_acquire(&mclk->data_lock);
 	std = mclk->switch_std;
 	avg = mclk->switch_avg;
 	max = mclk->switch_max;
 	min = mclk->switch_min;
 	num = mclk->switch_num;
-	mutex_unlock(&mclk->data_lock);
+	nvgpu_mutex_release(&mclk->data_lock);
 
 	tmp = std;
 	do_div(tmp, num);
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index cb7f0de0..731f289d 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+* Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -14,7 +14,7 @@
 #ifndef _CLKMCLK_H_
 #define _CLKMCLK_H_
 
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 
 enum gk20a_mclk_speed {
 	gk20a_mclk_low_speed,
@@ -24,8 +24,8 @@ enum gk20a_mclk_speed {
 
 struct clk_mclk_state {
 	enum gk20a_mclk_speed speed;
-	struct mutex mclk_lock;
-	struct mutex data_lock;
+	struct nvgpu_mutex mclk_lock;
+	struct nvgpu_mutex data_lock;
 
 	u16 p5_min;
 	u16 p0_min;
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index cf8c4569..20209efc 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -1,7 +1,7 @@
 /*
  * gk20a allocator
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -140,7 +140,7 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a,
 	a->priv = priv;
 	a->debug = dbg;
 
-	mutex_init(&a->lock);
+	nvgpu_mutex_init(&a->lock);
 
 	strlcpy(a->name, name, sizeof(a->name));
 
diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c
index 80f1cca0..6b5cfa55 100644
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/nvgpu_common.c
@@ -34,13 +34,13 @@ static void nvgpu_init_vars(struct gk20a *g)
 
 	init_rwsem(&g->busy_lock);
 
-	spin_lock_init(&g->mc_enable_lock);
+	nvgpu_spinlock_init(&g->mc_enable_lock);
 
-	mutex_init(&platform->railgate_lock);
-	mutex_init(&g->dbg_sessions_lock);
-	mutex_init(&g->client_lock);
-	mutex_init(&g->ch_wdt_lock);
-	mutex_init(&g->poweroff_lock);
+	nvgpu_mutex_init(&platform->railgate_lock);
+	nvgpu_mutex_init(&g->dbg_sessions_lock);
+	nvgpu_mutex_init(&g->client_lock);
+	nvgpu_mutex_init(&g->ch_wdt_lock);
+	nvgpu_mutex_init(&g->poweroff_lock);
 
 	g->regs_saved = g->regs;
 	g->bar1_saved = g->bar1;
@@ -52,7 +52,7 @@ static void nvgpu_init_vars(struct gk20a *g)
 	dma_set_max_seg_size(g->dev, UINT_MAX);
 
 	INIT_LIST_HEAD(&g->pending_sema_waits);
-	raw_spin_lock_init(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
 }
 
 static void nvgpu_init_timeout(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 4bf8695d..919f26ec 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -24,13 +24,13 @@
 #define __lock_sema_sea(s)						\
 	do {								\
 		gpu_sema_verbose_dbg("Acquiring sema lock...");		\
-		mutex_lock(&s->sea_lock);				\
+		nvgpu_mutex_acquire(&s->sea_lock);			\
 		gpu_sema_verbose_dbg("Sema lock aquried!");		\
 	} while (0)
 
 #define __unlock_sema_sea(s)						\
 	do {								\
-		mutex_unlock(&s->sea_lock);				\
+		nvgpu_mutex_release(&s->sea_lock);			\
 		gpu_sema_verbose_dbg("Released sema lock");		\
 	} while (0)
 
@@ -81,7 +81,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
 	g->sema_sea->page_count = 0;
 	g->sema_sea->gk20a = g;
 	INIT_LIST_HEAD(&g->sema_sea->pool_list);
-	mutex_init(&g->sema_sea->sea_lock);
+	nvgpu_mutex_init(&g->sema_sea->sea_lock);
 
 	if (__nvgpu_semaphore_sea_grow(g->sema_sea))
 		goto cleanup;
@@ -138,7 +138,7 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
 	p->sema_sea = sea;
 	INIT_LIST_HEAD(&p->hw_semas);
 	kref_init(&p->ref);
-	mutex_init(&p->pool_lock);
+	nvgpu_mutex_init(&p->pool_lock);
 
 	sea->page_count++;
 	list_add(&p->pool_list_entry, &sea->pool_list);
@@ -344,7 +344,7 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
 
 	BUG_ON(!p);
 
-	mutex_lock(&p->pool_lock);
+	nvgpu_mutex_acquire(&p->pool_lock);
 
 	/* Find an available HW semaphore. */
 	hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
@@ -371,14 +371,14 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
 
 	list_add(&hw_sema->hw_sema_list, &p->hw_semas);
 
-	mutex_unlock(&p->pool_lock);
+	nvgpu_mutex_release(&p->pool_lock);
 
 	return 0;
 
 fail_free_idx:
 	clear_bit(hw_sema_idx, p->semas_alloced);
 fail:
-	mutex_unlock(&p->pool_lock);
+	nvgpu_mutex_release(&p->pool_lock);
 	return ret;
 }
 
@@ -391,7 +391,7 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
 
 	BUG_ON(!p);
 
-	mutex_lock(&p->pool_lock);
+	nvgpu_mutex_acquire(&p->pool_lock);
 
 	clear_bit(ch->hw_sema->idx, p->semas_alloced);
 
@@ -400,7 +400,7 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
 	kfree(ch->hw_sema);
 	ch->hw_sema = NULL;
 
-	mutex_unlock(&p->pool_lock);
+	nvgpu_mutex_release(&p->pool_lock);
 }
 
 /*
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 2a9ad40d..d43bc93f 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -101,9 +101,9 @@ __acquires(&cde_app->mutex)
 		return;
 
 	if (wait_finish) {
-		mutex_unlock(&cde_app->mutex);
+		nvgpu_mutex_release(&cde_app->mutex);
 		cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
-		mutex_lock(&cde_app->mutex);
+		nvgpu_mutex_acquire(&cde_app->mutex);
 	} else {
 		cancel_delayed_work(&cde_ctx->ctx_deleter_work);
 	}
@@ -152,9 +152,9 @@ __releases(&cde_app->mutex)
 	if (!cde_app->initialised)
 		return;
 
-	mutex_lock(&cde_app->mutex);
+	nvgpu_mutex_acquire(&cde_app->mutex);
 	gk20a_cde_stop(g);
-	mutex_unlock(&cde_app->mutex);
+	nvgpu_mutex_release(&cde_app->mutex);
 }
 
 void gk20a_cde_suspend(struct gk20a *g)
@@ -167,7 +167,7 @@ __releases(&cde_app->mutex)
 	if (!cde_app->initialised)
 		return;
 
-	mutex_lock(&cde_app->mutex);
+	nvgpu_mutex_acquire(&cde_app->mutex);
 
 	list_for_each_entry_safe(cde_ctx, cde_ctx_save,
 			&cde_app->free_contexts, list) {
@@ -179,7 +179,7 @@ __releases(&cde_app->mutex)
 		gk20a_cde_cancel_deleter(cde_ctx, false);
 	}
 
-	mutex_unlock(&cde_app->mutex);
+	nvgpu_mutex_release(&cde_app->mutex);
 
 }
 
@@ -739,7 +739,7 @@ __releases(&cde_app->mutex)
 	gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
 	trace_gk20a_cde_release(cde_ctx);
 
-	mutex_lock(&cde_app->mutex);
+	nvgpu_mutex_acquire(&cde_app->mutex);
 
 	if (cde_ctx->in_use) {
 		cde_ctx->in_use = false;
@@ -749,7 +749,7 @@ __releases(&cde_app->mutex)
 		gk20a_dbg_info("double release cde context %p", cde_ctx);
 	}
 
-	mutex_unlock(&cde_app->mutex);
+	nvgpu_mutex_release(&cde_app->mutex);
 }
 
 static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
@@ -779,7 +779,7 @@ __releases(&cde_app->mutex)
 		return;
 	}
 
-	mutex_lock(&cde_app->mutex);
+	nvgpu_mutex_acquire(&cde_app->mutex);
 	if (cde_ctx->in_use || !cde_app->initialised) {
 		gk20a_dbg(gpu_dbg_cde_ctx,
 				"cde: context use raced, not deleting %p",
@@ -797,7 +797,7 @@ __releases(&cde_app->mutex)
 			cde_app->ctx_count_top);
 
 out:
-	mutex_unlock(&cde_app->mutex);
+	nvgpu_mutex_release(&cde_app->mutex);
 	gk20a_idle(dev);
 }
 
@@ -876,9 +876,9 @@ __acquires(&cde_app->mutex)
 			break;
 
 		/* exhausted, retry */
-		mutex_unlock(&cde_app->mutex);
+		nvgpu_mutex_release(&cde_app->mutex);
 		cond_resched();
-		mutex_lock(&cde_app->mutex);
+		nvgpu_mutex_acquire(&cde_app->mutex);
 	} while (!nvgpu_timeout_expired(&timeout));
 
 	return cde_ctx;
@@ -946,7 +946,7 @@ __releases(&cde_app->mutex)
 	    scatterbuffer_byte_offset < compbits_byte_offset)
 		return -EINVAL;
 
-	mutex_lock(&g->cde_app.mutex);
+	nvgpu_mutex_acquire(&g->cde_app.mutex);
 
 	cde_ctx = gk20a_cde_get_context(g);
 	if (IS_ERR(cde_ctx)) {
@@ -1118,7 +1118,7 @@ exit_unlock:
 	if (surface)
 		dma_buf_vunmap(compbits_scatter_buf, surface);
 
-	mutex_unlock(&g->cde_app.mutex);
+	nvgpu_mutex_release(&g->cde_app.mutex);
 	return err;
 }
 
@@ -1155,13 +1155,13 @@ __releases(&cde_app->mutex)
 					"cde: channel had timed out"
 					", reloading");
 			/* mark it to be deleted, replace with a new one */
-			mutex_lock(&cde_app->mutex);
+			nvgpu_mutex_acquire(&cde_app->mutex);
 			cde_ctx->is_temporary = true;
 			if (gk20a_cde_create_context(g)) {
 				gk20a_err(cde_ctx->dev,
 						"cde: can't replace context");
 			}
-			mutex_unlock(&cde_app->mutex);
+			nvgpu_mutex_release(&cde_app->mutex);
 		}
 	}
 
@@ -1274,7 +1274,7 @@ __releases(&cde_app->mutex)
 	if (err)
 		return err;
 
-	mutex_lock(&cde_app->mutex);
+	nvgpu_mutex_acquire(&cde_app->mutex);
 
 	gk20a_cde_stop(g);
 
@@ -1282,7 +1282,7 @@ __releases(&cde_app->mutex)
 	if (!err)
 		cde_app->initialised = true;
 
-	mutex_unlock(&cde_app->mutex);
+	nvgpu_mutex_release(&cde_app->mutex);
 
 	gk20a_idle(g->dev);
 	return err;
@@ -1300,8 +1300,8 @@ __releases(&cde_app->mutex)
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
 
-	mutex_init(&cde_app->mutex);
-	mutex_lock(&cde_app->mutex);
+	nvgpu_mutex_init(&cde_app->mutex);
+	nvgpu_mutex_acquire(&cde_app->mutex);
 
 	INIT_LIST_HEAD(&cde_app->free_contexts);
 	INIT_LIST_HEAD(&cde_app->used_contexts);
@@ -1313,7 +1313,7 @@ __releases(&cde_app->mutex)
 	if (!err)
 		cde_app->initialised = true;
 
-	mutex_unlock(&cde_app->mutex);
+	nvgpu_mutex_release(&cde_app->mutex);
 	gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
 	return err;
 }
@@ -1561,7 +1561,7 @@ int gk20a_prepare_compressible_read(
 
 	missing_bits = (state->valid_compbits ^ request) & request;
 
-	mutex_lock(&state->lock);
+	nvgpu_mutex_acquire(&state->lock);
 
 	if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
 
@@ -1599,7 +1599,7 @@ int gk20a_prepare_compressible_read(
 		*zbc_color = state->zbc_color;
 
 out:
-	mutex_unlock(&state->lock);
+	nvgpu_mutex_release(&state->lock);
 	dma_buf_put(dmabuf);
 	return err;
 }
@@ -1624,7 +1624,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 		return err;
 	}
 
-	mutex_lock(&state->lock);
+	nvgpu_mutex_acquire(&state->lock);
 
 	/* Update the compbits state. */
 	state->valid_compbits = valid_compbits;
@@ -1634,7 +1634,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 	gk20a_fence_put(state->fence);
 	state->fence = NULL;
 
-	mutex_unlock(&state->lock);
+	nvgpu_mutex_release(&state->lock);
 	dma_buf_put(dmabuf);
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
index 8cdba938..1136b0ad 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -1,7 +1,7 @@
 /*
  * GK20A color decompression engine support
  *
- * Copyright (c) 2014-2016, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,7 +262,7 @@ struct gk20a_cde_ctx {
 
 struct gk20a_cde_app {
 	bool initialised;
-	struct mutex mutex;
+	struct nvgpu_mutex mutex;
 
 	struct list_head free_contexts;
 	struct list_head used_contexts;
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 023c959e..fd248313 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -107,7 +107,7 @@ static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event)
 	if (!ce_app->initialised)
 		return;
 
-	mutex_lock(&ce_app->app_mutex);
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
 
 	list_for_each_entry_safe(ce_ctx, ce_ctx_save,
 			&ce_app->allocated_contexts, list) {
@@ -117,7 +117,7 @@ static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event)
 		}
 	}
 
-	mutex_unlock(&ce_app->app_mutex);
+	nvgpu_mutex_release(&ce_app->app_mutex);
 }
 
 static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data)
@@ -183,14 +183,14 @@ static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_c
 	}
 }
 
-/* assume this api should need to call under mutex_lock(&ce_app->app_mutex) */
+/* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */
 static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
 {
 	struct list_head *list = &ce_ctx->list;
 
 	ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
 
-	mutex_lock(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
 
 	if (ce_ctx->cmd_buf_mem.cpu_va) {
 		gk20a_ce_free_command_buffer_stored_fence(ce_ctx);
@@ -205,8 +205,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
 	if (list->prev && list->next)
 		list_del(list);
 
-	mutex_unlock(&ce_ctx->gpu_ctx_mutex);
-	mutex_destroy(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex);
 
 	kfree(ce_ctx);
 }
@@ -353,8 +353,8 @@ int gk20a_init_ce_support(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_fn, "ce: init");
 
-	mutex_init(&ce_app->app_mutex);
-	mutex_lock(&ce_app->app_mutex);
+	nvgpu_mutex_init(&ce_app->app_mutex);
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
 
 	INIT_LIST_HEAD(&ce_app->allocated_contexts);
 	ce_app->ctx_count = 0;
@@ -362,7 +362,7 @@ int gk20a_init_ce_support(struct gk20a *g)
 	ce_app->initialised = true;
 	ce_app->app_state = NVGPU_CE_ACTIVE;
 
-	mutex_unlock(&ce_app->app_mutex);
+	nvgpu_mutex_release(&ce_app->app_mutex);
 	gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished");
 
 	return 0;
@@ -379,7 +379,7 @@ void gk20a_ce_destroy(struct gk20a *g)
 	ce_app->app_state = NVGPU_CE_SUSPEND;
 	ce_app->initialised = false;
 
-	mutex_lock(&ce_app->app_mutex);
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
 
 	list_for_each_entry_safe(ce_ctx, ce_ctx_save,
 			&ce_app->allocated_contexts, list) {
@@ -390,8 +390,8 @@ void gk20a_ce_destroy(struct gk20a *g)
 	ce_app->ctx_count = 0;
 	ce_app->next_ctx_id = 0;
 
-	mutex_unlock(&ce_app->app_mutex);
-	mutex_destroy(&ce_app->app_mutex);
+	nvgpu_mutex_release(&ce_app->app_mutex);
+	nvgpu_mutex_destroy(&ce_app->app_mutex);
 }
 
 void gk20a_ce_suspend(struct gk20a *g)
@@ -428,7 +428,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
 	if (!ce_ctx)
 		return ctx_id;
 
-	mutex_init(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex);
 
 	ce_ctx->g = g;
 	ce_ctx->dev = g->dev;
@@ -508,20 +508,20 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
 		}
 	}
 
-	mutex_lock(&ce_app->app_mutex);
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
 	ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
 	list_add(&ce_ctx->list, &ce_app->allocated_contexts);
 	++ce_app->next_ctx_id;
 	++ce_app->ctx_count;
-	mutex_unlock(&ce_app->app_mutex);
+	nvgpu_mutex_release(&ce_app->app_mutex);
 
 	ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
 
 end:
 	if (ctx_id == (u32)~0) {
-		mutex_lock(&ce_app->app_mutex);
+		nvgpu_mutex_acquire(&ce_app->app_mutex);
 		gk20a_ce_delete_gpu_context(ce_ctx);
-		mutex_unlock(&ce_app->app_mutex);
+		nvgpu_mutex_release(&ce_app->app_mutex);
 	}
 	return ctx_id;
 
@@ -558,7 +558,7 @@ int gk20a_ce_execute_ops(struct device *dev,
 	if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
 		goto end;
 
-	mutex_lock(&ce_app->app_mutex);
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
 
 	list_for_each_entry_safe(ce_ctx, ce_ctx_save,
 			&ce_app->allocated_contexts, list) {
@@ -568,7 +568,7 @@ int gk20a_ce_execute_ops(struct device *dev,
 		}
 	}
 
-	mutex_unlock(&ce_app->app_mutex);
+	nvgpu_mutex_release(&ce_app->app_mutex);
 
 	if (!found) {
 		ret = -EINVAL;
@@ -580,7 +580,7 @@ int gk20a_ce_execute_ops(struct device *dev,
 		goto end;
 	}
 
-	mutex_lock(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
 
 	ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
 
@@ -672,7 +672,7 @@ int gk20a_ce_execute_ops(struct device *dev,
 	} else
 		ret = -ENOMEM;
 noop:
-	mutex_unlock(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
 end:
 	return ret;
 }
@@ -688,7 +688,7 @@ void gk20a_ce_delete_context(struct device *dev,
 	if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
 		return;
 
-	mutex_lock(&ce_app->app_mutex);
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
 
 	list_for_each_entry_safe(ce_ctx, ce_ctx_save,
 			&ce_app->allocated_contexts, list) {
@@ -699,7 +699,7 @@ void gk20a_ce_delete_context(struct device *dev,
 		}
 	}
 
-	mutex_unlock(&ce_app->app_mutex);
+	nvgpu_mutex_release(&ce_app->app_mutex);
 	return;
 }
 EXPORT_SYMBOL(gk20a_ce_delete_context);
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 3b53834d..1bb25dd1 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -3,7 +3,7 @@
  *
  * GK20A graphics copy engine (gr host)
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -88,7 +88,7 @@ enum {
 /* global ce app db */
 struct gk20a_ce_app {
 	bool initialised;
-	struct mutex app_mutex;
+	struct nvgpu_mutex app_mutex;
 	int app_state;
 
 	struct list_head allocated_contexts;
@@ -101,7 +101,7 @@ struct gk20a_gpu_ctx {
 	struct gk20a *g;
 	struct device *dev;
 	u32 ctx_id;
-	struct mutex gpu_ctx_mutex;
+	struct nvgpu_mutex gpu_ctx_mutex;
 	int gpu_ctx_state;
 	ce_event_callback user_event_callback;
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 376a64b0..83a3a523 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -102,7 +102,7 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
 
 	platform = gk20a_get_platform(f->g->dev);
 
-	mutex_lock(&f->free_chs_mutex);
+	nvgpu_mutex_acquire(&f->free_chs_mutex);
 	if (!list_empty(&f->free_chs)) {
 		ch = list_first_entry(&f->free_chs, struct channel_gk20a,
 				free_chs);
@@ -111,7 +111,7 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
 		WARN_ON(ch->referenceable);
 		f->used_channels++;
 	}
-	mutex_unlock(&f->free_chs_mutex);
+	nvgpu_mutex_release(&f->free_chs_mutex);
 
 	if (platform->aggressive_sync_destroy_thresh &&
 			(f->used_channels >
@@ -128,11 +128,11 @@ static void free_channel(struct fifo_gk20a *f,
 
 	trace_gk20a_release_used_channel(ch->hw_chid);
 	/* refcount is zero here and channel is in a freed/dead state */
-	mutex_lock(&f->free_chs_mutex);
+	nvgpu_mutex_acquire(&f->free_chs_mutex);
 	/* add to head to increase visibility of timing-related bugs */
 	list_add(&ch->free_chs, &f->free_chs);
 	f->used_channels--;
-	mutex_unlock(&f->free_chs_mutex);
+	nvgpu_mutex_release(&f->free_chs_mutex);
 
 	if (platform->aggressive_sync_destroy_thresh &&
 			(f->used_channels <
@@ -494,10 +494,10 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 	gk20a_channel_cancel_job_clean_up(ch, true);
 
 	/* ensure no fences are pending */
-	mutex_lock(&ch->sync_lock);
+	nvgpu_mutex_acquire(&ch->sync_lock);
 	if (ch->sync)
 		ch->sync->set_min_eq_max(ch->sync);
-	mutex_unlock(&ch->sync_lock);
+	nvgpu_mutex_release(&ch->sync_lock);
 
 	/* release all job semaphores (applies only to jobs that use
 	   semaphore synchronization) */
@@ -595,7 +595,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch)
 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 {
 	/* disable existing cyclestats buffer */
-	mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
 	if (ch->cyclestate.cyclestate_buffer_handler) {
 		dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
 				ch->cyclestate.cyclestate_buffer);
@@ -604,7 +604,7 @@ static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 		ch->cyclestate.cyclestate_buffer = NULL;
 		ch->cyclestate.cyclestate_buffer_size = 0;
 	}
-	mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
 }
 
 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
@@ -654,12 +654,12 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
 {
 	int ret;
 
-	mutex_lock(&ch->cs_client_mutex);
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
 	if (ch->cs_client)
 		ret = gr_gk20a_css_flush(ch, ch->cs_client);
 	else
 		ret = -EBADF;
-	mutex_unlock(&ch->cs_client_mutex);
+	nvgpu_mutex_release(&ch->cs_client_mutex);
 
 	return ret;
 }
@@ -671,7 +671,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 {
 	int ret;
 
-	mutex_lock(&ch->cs_client_mutex);
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
 	if (ch->cs_client) {
 		ret = -EEXIST;
 	} else {
@@ -681,7 +681,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 					perfmon_id_start,
 					&ch->cs_client);
 	}
-	mutex_unlock(&ch->cs_client_mutex);
+	nvgpu_mutex_release(&ch->cs_client_mutex);
 
 	return ret;
 }
@@ -690,14 +690,14 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
 {
 	int ret;
 
-	mutex_lock(&ch->cs_client_mutex);
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
 	if (ch->cs_client) {
 		ret = gr_gk20a_css_detach(ch, ch->cs_client);
 		ch->cs_client = NULL;
 	} else {
 		ret = 0;
 	}
-	mutex_unlock(&ch->cs_client_mutex);
+	nvgpu_mutex_release(&ch->cs_client_mutex);
 
 	return ret;
 }
@@ -824,9 +824,9 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 	memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
 
 	/* set channel notifiers pointer */
-	mutex_lock(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
 	ch->error_notifier_ref = dmabuf;
-	mutex_unlock(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&ch->error_notifier_mutex);
 
 	return 0;
 }
@@ -857,14 +857,14 @@ void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
 
 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 {
-	mutex_lock(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
 	gk20a_set_error_notifier_locked(ch, error);
-	mutex_unlock(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&ch->error_notifier_mutex);
 }
 
 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
 {
-	mutex_lock(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
 	if (ch->error_notifier_ref) {
 		dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
 		dma_buf_put(ch->error_notifier_ref);
@@ -872,7 +872,7 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
 		ch->error_notifier = NULL;
 		ch->error_notifier_va = NULL;
 	}
-	mutex_unlock(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&ch->error_notifier_mutex);
 }
 
 static void gk20a_wait_until_counter_is_N(
@@ -927,16 +927,16 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	nvgpu_wait_for_deferred_interrupts(g);
 
 	/* prevent new refs */
-	spin_lock(&ch->ref_obtain_lock);
+	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
 	if (!ch->referenceable) {
-		spin_unlock(&ch->ref_obtain_lock);
+		nvgpu_spinlock_release(&ch->ref_obtain_lock);
 		gk20a_err(dev_from_gk20a(ch->g),
 			  "Extra %s() called to channel %u",
 			  __func__, ch->hw_chid);
 		return;
 	}
 	ch->referenceable = false;
-	spin_unlock(&ch->ref_obtain_lock);
+	nvgpu_spinlock_release(&ch->ref_obtain_lock);
 
 	/* matches with the initial reference in gk20a_open_new_channel() */
 	atomic_dec(&ch->ref_count);
@@ -948,18 +948,18 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 			__func__, "references");
 
 	/* if engine reset was deferred, perform it now */
-	mutex_lock(&f->deferred_reset_mutex);
+	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
 	if (g->fifo.deferred_reset_pending) {
 		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
 			   " deferred, running now");
 		/* if lock is already taken, a reset is taking place
 		so no need to repeat */
-		if (mutex_trylock(&g->fifo.gr_reset_mutex)) {
+		if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
 			gk20a_fifo_deferred_reset(g, ch);
-			mutex_unlock(&g->fifo.gr_reset_mutex);
+			nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
 		}
 	}
-	mutex_unlock(&f->deferred_reset_mutex);
+	nvgpu_mutex_release(&f->deferred_reset_mutex);
 
 	if (!gk20a_channel_as_bound(ch))
 		goto unbind;
@@ -991,12 +991,12 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	channel_gk20a_free_priv_cmdbuf(ch);
 
 	/* sync must be destroyed before releasing channel vm */
-	mutex_lock(&ch->sync_lock);
+	nvgpu_mutex_acquire(&ch->sync_lock);
 	if (ch->sync) {
 		gk20a_channel_sync_destroy(ch->sync);
 		ch->sync = NULL;
 	}
-	mutex_unlock(&ch->sync_lock);
+	nvgpu_mutex_release(&ch->sync_lock);
 
 	/*
 	 * free the channel used semaphore index.
@@ -1011,10 +1011,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	 */
 	gk20a_vm_put(ch_vm);
 
-	spin_lock(&ch->update_fn_lock);
+	nvgpu_spinlock_acquire(&ch->update_fn_lock);
 	ch->update_fn = NULL;
 	ch->update_fn_data = NULL;
-	spin_unlock(&ch->update_fn_lock);
+	nvgpu_spinlock_release(&ch->update_fn_lock);
 	cancel_work_sync(&ch->update_fn_work);
 	cancel_delayed_work_sync(&ch->clean_up.wq);
 	cancel_delayed_work_sync(&ch->timeout.wq);
@@ -1037,21 +1037,21 @@ unbind:
 	WARN_ON(ch->sync);
 
 	/* unlink all debug sessions */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	list_for_each_entry_safe(session_data, tmp_s,
 				&ch->dbg_s_list, dbg_s_entry) {
 		dbg_s = session_data->dbg_s;
-		mutex_lock(&dbg_s->ch_list_lock);
+		nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 		list_for_each_entry_safe(ch_data, tmp,
 					&dbg_s->ch_list, ch_entry) {
 			if (ch_data->chid == ch->hw_chid)
 				dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
 		}
-		mutex_unlock(&dbg_s->ch_list_lock);
+		nvgpu_mutex_release(&dbg_s->ch_list_lock);
 	}
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	/* free pre-allocated resources, if applicable */
 	if (channel_gk20a_is_prealloc_enabled(ch))
@@ -1079,7 +1079,7 @@ static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
 	unsigned long prev_jiffies = 0;
 	struct device *dev = dev_from_gk20a(ch->g);
 
-	spin_lock(&ch->ref_actions_lock);
+	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
 
 	dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n",
 			ch->hw_chid, atomic_read(&ch->ref_count));
@@ -1109,7 +1109,7 @@ static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
 		get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
 	}
 
-	spin_unlock(&ch->ref_actions_lock);
+	nvgpu_spinlock_release(&ch->ref_actions_lock);
 #endif
 }
 
@@ -1119,7 +1119,7 @@ static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
 	struct channel_gk20a_ref_action *act;
 
-	spin_lock(&ch->ref_actions_lock);
+	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
 
 	act = &ch->ref_actions[ch->ref_actions_put];
 	act->type = type;
@@ -1132,7 +1132,7 @@ static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
 	ch->ref_actions_put = (ch->ref_actions_put + 1) %
 		GK20A_CHANNEL_REFCOUNT_TRACKING;
 
-	spin_unlock(&ch->ref_actions_lock);
+	nvgpu_spinlock_release(&ch->ref_actions_lock);
 #endif
 }
 
@@ -1152,7 +1152,7 @@ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
 					 const char *caller) {
 	struct channel_gk20a *ret;
 
-	spin_lock(&ch->ref_obtain_lock);
+	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
 
 	if (likely(ch->referenceable)) {
 		gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
@@ -1161,7 +1161,7 @@ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
 	} else
 		ret = NULL;
 
-	spin_unlock(&ch->ref_obtain_lock);
+	nvgpu_spinlock_release(&ch->ref_obtain_lock);
 
 	if (ret)
 		trace_gk20a_channel_get(ch->hw_chid, caller);
@@ -1250,10 +1250,10 @@ static void gk20a_channel_update_runcb_fn(struct work_struct *work)
 	void (*update_fn)(struct channel_gk20a *, void *);
 	void *update_fn_data;
 
-	spin_lock(&ch->update_fn_lock);
+	nvgpu_spinlock_acquire(&ch->update_fn_lock);
 	update_fn = ch->update_fn;
 	update_fn_data = ch->update_fn_data;
-	spin_unlock(&ch->update_fn_lock);
+	nvgpu_spinlock_release(&ch->update_fn_lock);
 
 	if (update_fn)
 		update_fn(ch, update_fn_data);
@@ -1268,10 +1268,10 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 	struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
 
 	if (ch) {
-		spin_lock(&ch->update_fn_lock);
+		nvgpu_spinlock_acquire(&ch->update_fn_lock);
 		ch->update_fn = update_fn;
 		ch->update_fn_data = update_fn_data;
-		spin_unlock(&ch->update_fn_lock);
+		nvgpu_spinlock_release(&ch->update_fn_lock);
 	}
 
 	return ch;
@@ -1325,13 +1325,13 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 	ch->tgid = current->tgid;  /* process granularity for FECS traces */
 
 	/* unhook all events created on this channel */
-	mutex_lock(&ch->event_id_list_lock);
+	nvgpu_mutex_acquire(&ch->event_id_list_lock);
 	list_for_each_entry_safe(event_id_data, event_id_data_temp,
 				&ch->event_id_list,
 				event_id_node) {
 		list_del_init(&event_id_data->event_id_node);
 	}
-	mutex_unlock(&ch->event_id_list_lock);
+	nvgpu_mutex_release(&ch->event_id_list_lock);
 
 	/* By default, channel is regular (non-TSG) channel */
 	ch->tsgid = NVGPU_INVALID_TSG_ID;
@@ -1357,7 +1357,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 
 	ch->update_fn = NULL;
 	ch->update_fn_data = NULL;
-	spin_lock_init(&ch->update_fn_lock);
+	nvgpu_spinlock_init(&ch->update_fn_lock);
 	INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
 
 	/* Mark the channel alive, get-able, with 1 initial use
@@ -1652,17 +1652,17 @@ static void channel_gk20a_free_job(struct channel_gk20a *c,
 void channel_gk20a_joblist_lock(struct channel_gk20a *c)
 {
 	if (channel_gk20a_is_prealloc_enabled(c))
-		mutex_lock(&c->joblist.pre_alloc.read_lock);
+		nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
 	else
-		spin_lock(&c->joblist.dynamic.lock);
+		nvgpu_spinlock_acquire(&c->joblist.dynamic.lock);
 }
 
 void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
 {
 	if (channel_gk20a_is_prealloc_enabled(c))
-		mutex_unlock(&c->joblist.pre_alloc.read_lock);
+		nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
 	else
-		spin_unlock(&c->joblist.dynamic.lock);
+		nvgpu_spinlock_release(&c->joblist.dynamic.lock);
 }
 
 static struct channel_gk20a_job *channel_gk20a_joblist_peek(
@@ -1871,14 +1871,14 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 	channel_gk20a_setup_userd(c);
 
 	if (!platform->aggressive_sync_destroy_thresh) {
-		mutex_lock(&c->sync_lock);
+		nvgpu_mutex_acquire(&c->sync_lock);
 		c->sync = gk20a_channel_sync_create(c);
 		if (!c->sync) {
 			err = -ENOMEM;
-			mutex_unlock(&c->sync_lock);
+			nvgpu_mutex_release(&c->sync_lock);
 			goto clean_up_unmap;
 		}
-		mutex_unlock(&c->sync_lock);
+		nvgpu_mutex_release(&c->sync_lock);
 
 		if (g->ops.fifo.resetup_ramfc) {
 			err = g->ops.fifo.resetup_ramfc(c);
@@ -2085,16 +2085,16 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
 	if (!ch->wdt_enabled)
 		return;
 
-	raw_spin_lock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 
 	if (ch->timeout.initialized) {
-		raw_spin_unlock(&ch->timeout.lock);
+		nvgpu_raw_spinlock_release(&ch->timeout.lock);
 		return;
 	}
 
 	ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
 	ch->timeout.initialized = true;
-	raw_spin_unlock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
 
 	schedule_delayed_work(&ch->timeout.wq,
 	       msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
@@ -2102,18 +2102,18 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
 
 static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
 {
-	raw_spin_lock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 	if (!ch->timeout.initialized) {
-		raw_spin_unlock(&ch->timeout.lock);
+		nvgpu_raw_spinlock_release(&ch->timeout.lock);
 		return;
 	}
-	raw_spin_unlock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
 
 	cancel_delayed_work_sync(&ch->timeout.wq);
 
-	raw_spin_lock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 	ch->timeout.initialized = false;
-	raw_spin_unlock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
 }
 
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
@@ -2125,13 +2125,13 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
 		struct channel_gk20a *ch = &f->channel[chid];
 
 		if (gk20a_channel_get(ch)) {
-			raw_spin_lock(&ch->timeout.lock);
+			nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 			if (!ch->timeout.initialized) {
-				raw_spin_unlock(&ch->timeout.lock);
+				nvgpu_raw_spinlock_release(&ch->timeout.lock);
 				gk20a_channel_put(ch);
 				continue;
 			}
-			raw_spin_unlock(&ch->timeout.lock);
+			nvgpu_raw_spinlock_release(&ch->timeout.lock);
 
 			cancel_delayed_work_sync(&ch->timeout.wq);
 			if (!ch->has_timedout)
@@ -2164,13 +2164,13 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
 	}
 
 	/* Need global lock since multiple channels can timeout at a time */
-	mutex_lock(&g->ch_wdt_lock);
+	nvgpu_mutex_acquire(&g->ch_wdt_lock);
 
 	/* Get timed out job and reset the timer */
-	raw_spin_lock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 	gp_get = ch->timeout.gp_get;
 	ch->timeout.initialized = false;
-	raw_spin_unlock(&ch->timeout.lock);
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
 
 	if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
 		gk20a_channel_timeout_start(ch);
@@ -2187,7 +2187,7 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
 		NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
 
 fail_unlock:
-	mutex_unlock(&g->ch_wdt_lock);
+	nvgpu_mutex_release(&g->ch_wdt_lock);
 	gk20a_channel_put(ch);
 	gk20a_idle(dev_from_gk20a(g));
 }
@@ -2216,17 +2216,17 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
 
 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
 {
-	mutex_lock(&c->clean_up.lock);
+	nvgpu_mutex_acquire(&c->clean_up.lock);
 
 	if (c->clean_up.scheduled) {
-		mutex_unlock(&c->clean_up.lock);
+		nvgpu_mutex_release(&c->clean_up.lock);
 		return;
 	}
 
 	c->clean_up.scheduled = true;
 	schedule_delayed_work(&c->clean_up.wq, 1);
 
-	mutex_unlock(&c->clean_up.lock);
+	nvgpu_mutex_release(&c->clean_up.lock);
 }
 
 static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
@@ -2235,9 +2235,9 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
 	if (wait_for_completion)
 		cancel_delayed_work_sync(&c->clean_up.wq);
 
-	mutex_lock(&c->clean_up.lock);
+	nvgpu_mutex_acquire(&c->clean_up.lock);
 	c->clean_up.scheduled = false;
-	mutex_unlock(&c->clean_up.lock);
+	nvgpu_mutex_release(&c->clean_up.lock);
 }
 
 static int gk20a_channel_add_job(struct channel_gk20a *c,
@@ -2353,13 +2353,13 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 			c->sync->signal_timeline(c->sync);
 
 			if (platform->aggressive_sync_destroy_thresh) {
-				mutex_lock(&c->sync_lock);
+				nvgpu_mutex_acquire(&c->sync_lock);
 				if (atomic_dec_and_test(&c->sync->refcount) &&
 						platform->aggressive_sync_destroy) {
 					gk20a_channel_sync_destroy(c->sync);
 					c->sync = NULL;
 				}
-				mutex_unlock(&c->sync_lock);
+				nvgpu_mutex_release(&c->sync_lock);
 			}
 		}
 
@@ -2563,18 +2563,18 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 		need_sync_fence = true;
 
 	if (platform->aggressive_sync_destroy_thresh) {
-		mutex_lock(&c->sync_lock);
+		nvgpu_mutex_acquire(&c->sync_lock);
 		if (!c->sync) {
 			c->sync = gk20a_channel_sync_create(c);
 			if (!c->sync) {
 				err = -ENOMEM;
-				mutex_unlock(&c->sync_lock);
+				nvgpu_mutex_release(&c->sync_lock);
 				goto fail;
 			}
 			new_sync_created = true;
 		}
 		atomic_inc(&c->sync->refcount);
-		mutex_unlock(&c->sync_lock);
+		nvgpu_mutex_release(&c->sync_lock);
 	}
 
 	if (g->ops.fifo.resetup_ramfc && new_sync_created) {
@@ -2920,31 +2920,31 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 	c->g = NULL;
 	c->hw_chid = chid;
 	atomic_set(&c->bound, false);
-	spin_lock_init(&c->ref_obtain_lock);
+	nvgpu_spinlock_init(&c->ref_obtain_lock);
 	atomic_set(&c->ref_count, 0);
 	c->referenceable = false;
 	init_waitqueue_head(&c->ref_count_dec_wq);
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
-	spin_lock_init(&c->ref_actions_lock);
+	nvgpu_spinlock_init(&c->ref_actions_lock);
 #endif
-	mutex_init(&c->ioctl_lock);
-	mutex_init(&c->error_notifier_mutex);
-	spin_lock_init(&c->joblist.dynamic.lock);
-	mutex_init(&c->joblist.pre_alloc.read_lock);
-	raw_spin_lock_init(&c->timeout.lock);
-	mutex_init(&c->sync_lock);
+	nvgpu_mutex_init(&c->ioctl_lock);
+	nvgpu_mutex_init(&c->error_notifier_mutex);
+	nvgpu_spinlock_init(&c->joblist.dynamic.lock);
+	nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
+	nvgpu_raw_spinlock_init(&c->timeout.lock);
+	nvgpu_mutex_init(&c->sync_lock);
 	INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
 	INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
-	mutex_init(&c->clean_up.lock);
+	nvgpu_mutex_init(&c->clean_up.lock);
 	INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-	mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
-	mutex_init(&c->cs_client_mutex);
+	nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
+	nvgpu_mutex_init(&c->cs_client_mutex);
 #endif
 	INIT_LIST_HEAD(&c->dbg_s_list);
 	INIT_LIST_HEAD(&c->event_id_list);
-	mutex_init(&c->event_id_list_lock);
-	mutex_init(&c->dbg_s_lock);
+	nvgpu_mutex_init(&c->event_id_list_lock);
+	nvgpu_mutex_init(&c->dbg_s_lock);
 	list_add(&c->free_chs, &g->fifo.free_chs);
 
 	return 0;
@@ -3102,7 +3102,7 @@ static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
 
 	poll_wait(filep, &event_id_data->event_id_wq, wait);
 
-	mutex_lock(&event_id_data->lock);
+	nvgpu_mutex_acquire(&event_id_data->lock);
 
 	if (event_id_data->is_tsg) {
 		struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
@@ -3127,7 +3127,7 @@ static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
 		}
 	}
 
-	mutex_unlock(&event_id_data->lock);
+	nvgpu_mutex_release(&event_id_data->lock);
 
 	return mask;
 }
@@ -3140,15 +3140,15 @@ static int gk20a_event_id_release(struct inode *inode, struct file *filp)
 	if (event_id_data->is_tsg) {
 		struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
 
-		mutex_lock(&tsg->event_id_list_lock);
+		nvgpu_mutex_acquire(&tsg->event_id_list_lock);
 		list_del_init(&event_id_data->event_id_node);
-		mutex_unlock(&tsg->event_id_list_lock);
+		nvgpu_mutex_release(&tsg->event_id_list_lock);
 	} else {
 		struct channel_gk20a *ch = g->fifo.channel + event_id_data->id;
 
-		mutex_lock(&ch->event_id_list_lock);
+		nvgpu_mutex_acquire(&ch->event_id_list_lock);
 		list_del_init(&event_id_data->event_id_node);
-		mutex_unlock(&ch->event_id_list_lock);
+		nvgpu_mutex_release(&ch->event_id_list_lock);
 	}
 
 	kfree(event_id_data);
@@ -3170,7 +3170,7 @@ static int gk20a_channel_get_event_data_from_id(struct channel_gk20a *ch,
 	struct gk20a_event_id_data *local_event_id_data;
 	bool event_found = false;
 
-	mutex_lock(&ch->event_id_list_lock);
+	nvgpu_mutex_acquire(&ch->event_id_list_lock);
 	list_for_each_entry(local_event_id_data, &ch->event_id_list,
 						 event_id_node) {
 		if (local_event_id_data->event_id == event_id) {
@@ -3178,7 +3178,7 @@ static int gk20a_channel_get_event_data_from_id(struct channel_gk20a *ch,
 			break;
 		}
 	}
-	mutex_unlock(&ch->event_id_list_lock);
+	nvgpu_mutex_release(&ch->event_id_list_lock);
 
 	if (event_found) {
 		*event_id_data = local_event_id_data;
@@ -3199,7 +3199,7 @@ void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
 	if (err)
 		return;
 
-	mutex_lock(&event_id_data->lock);
+	nvgpu_mutex_acquire(&event_id_data->lock);
 
 	gk20a_dbg_info(
 		"posting event for event_id=%d on ch=%d\n",
@@ -3208,7 +3208,7 @@ void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
 
 	wake_up_interruptible_all(&event_id_data->event_id_wq);
 
-	mutex_unlock(&event_id_data->lock);
+	nvgpu_mutex_release(&event_id_data->lock);
 }
 
 static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
@@ -3253,12 +3253,12 @@ static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
 	event_id_data->event_id = event_id;
 
 	init_waitqueue_head(&event_id_data->event_id_wq);
-	mutex_init(&event_id_data->lock);
+	nvgpu_mutex_init(&event_id_data->lock);
 	INIT_LIST_HEAD(&event_id_data->event_id_node);
 
-	mutex_lock(&ch->event_id_list_lock);
+	nvgpu_mutex_acquire(&ch->event_id_list_lock);
 	list_add_tail(&event_id_data->event_id_node, &ch->event_id_list);
-	mutex_unlock(&ch->event_id_list_lock);
+	nvgpu_mutex_release(&ch->event_id_list_lock);
 
 	fd_install(local_fd, file);
 	file->private_data = event_id_data;
@@ -3569,7 +3569,7 @@ long gk20a_channel_ioctl(struct file *filp,
 
 	/* protect our sanity for threaded userspace - most of the channel is
 	 * not thread safe */
-	mutex_lock(&ch->ioctl_lock);
+	nvgpu_mutex_acquire(&ch->ioctl_lock);
 
 	/* this ioctl call keeps a ref to the file which keeps a ref to the
 	 * channel */
@@ -3660,12 +3660,12 @@ long gk20a_channel_ioctl(struct file *filp,
 
 		/* waiting is thread-safe, not dropping this mutex could
 		 * deadlock in certain conditions */
-		mutex_unlock(&ch->ioctl_lock);
+		nvgpu_mutex_release(&ch->ioctl_lock);
 
 		err = gk20a_channel_wait(ch,
 				(struct nvgpu_wait_args *)buf);
 
-		mutex_lock(&ch->ioctl_lock);
+		nvgpu_mutex_acquire(&ch->ioctl_lock);
 
 		gk20a_idle(dev);
 		break;
@@ -3899,7 +3899,7 @@ long gk20a_channel_ioctl(struct file *filp,
 	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
 		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
 
-	mutex_unlock(&ch->ioctl_lock);
+	nvgpu_mutex_release(&ch->ioctl_lock);
 
 	gk20a_channel_put(ch);
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index f940a271..14ee9f69 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -19,15 +19,15 @@
 #define CHANNEL_GK20A_H
 
 #include <linux/log2.h>
-#include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/semaphore.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/stacktrace.h>
 #include <linux/wait.h>
 #include <uapi/linux/nvgpu.h>
 
+#include <nvgpu/lock.h>
+
 struct gk20a;
 struct gr_gk20a;
 struct dbg_session_gk20a;
@@ -80,18 +80,18 @@ struct channel_gk20a_joblist {
 		unsigned int put;
 		unsigned int get;
 		struct channel_gk20a_job *jobs;
-		struct mutex read_lock;
+		struct nvgpu_mutex read_lock;
 	} pre_alloc;
 
 	struct {
 		struct list_head jobs;
-		spinlock_t lock;
+		struct nvgpu_spinlock lock;
 	} dynamic;
 };
 
 struct channel_gk20a_timeout {
 	struct delayed_work wq;
-	raw_spinlock_t lock;
+	struct nvgpu_raw_spinlock lock;
 	bool initialized;
 	u32 gp_get;
 };
@@ -106,12 +106,12 @@ struct gk20a_event_id_data {
 	bool event_posted;
 
 	wait_queue_head_t event_id_wq;
-	struct mutex lock;
+	struct nvgpu_mutex lock;
 	struct list_head event_id_node;
 };
 
 struct channel_gk20a_clean_up {
-	struct mutex lock;
+	struct nvgpu_mutex lock;
 	bool scheduled;
 	struct delayed_work wq;
 };
@@ -156,7 +156,7 @@ struct channel_gk20a {
 
 	struct list_head free_chs;
 
-	spinlock_t ref_obtain_lock;
+	struct nvgpu_spinlock ref_obtain_lock;
 	bool referenceable;
 	atomic_t ref_count;
 	wait_queue_head_t ref_count_dec_wq;
@@ -169,7 +169,7 @@ struct channel_gk20a {
 	struct channel_gk20a_ref_action ref_actions[
 		GK20A_CHANNEL_REFCOUNT_TRACKING];
 	size_t ref_actions_put; /* index of next write */
-	spinlock_t ref_actions_lock;
+	struct nvgpu_spinlock ref_actions_lock;
 #endif
 
 	struct nvgpu_semaphore_int *hw_sema;
@@ -183,7 +183,7 @@ struct channel_gk20a {
 	bool cde;
 	pid_t pid;
 	pid_t tgid;
-	struct mutex ioctl_lock;
+	struct nvgpu_mutex ioctl_lock;
 
 	int tsgid;
 	struct list_head ch_entry; /* channel's entry in TSG */
@@ -221,17 +221,17 @@ struct channel_gk20a {
 	void *cyclestate_buffer;
 	u32 cyclestate_buffer_size;
 	struct dma_buf *cyclestate_buffer_handler;
-	struct mutex cyclestate_buffer_mutex;
+	struct nvgpu_mutex cyclestate_buffer_mutex;
 	} cyclestate;
 
-	struct mutex cs_client_mutex;
+	struct nvgpu_mutex cs_client_mutex;
 	struct gk20a_cs_snapshot_client *cs_client;
 #endif
-	struct mutex dbg_s_lock;
+	struct nvgpu_mutex dbg_s_lock;
 	struct list_head dbg_s_list;
 
 	struct list_head event_id_list;
-	struct mutex event_id_list_lock;
+	struct nvgpu_mutex event_id_list_lock;
 
 	bool has_timedout;
 	u32 timeout_ms_max;
@@ -241,9 +241,9 @@ struct channel_gk20a {
 	struct dma_buf *error_notifier_ref;
 	struct nvgpu_notification *error_notifier;
 	void *error_notifier_va;
-	struct mutex error_notifier_mutex;
+	struct nvgpu_mutex error_notifier_mutex;
 
-	struct mutex sync_lock;
+	struct nvgpu_mutex sync_lock;
 	struct gk20a_channel_sync *sync;
 
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -254,7 +254,7 @@ struct channel_gk20a {
 	 * via schedule_work */
 	void (*update_fn)(struct channel_gk20a *, void *);
 	void *update_fn_data;
-	spinlock_t update_fn_lock; /* make access to the two above atomic */
+	struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */
 	struct work_struct update_fn_work;
 
 	u32 interleave_level;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 18971b09..097635a7 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -414,9 +414,9 @@ struct wait_fence_work {
 static void gk20a_add_pending_sema_wait(struct gk20a *g,
 					struct wait_fence_work *work)
 {
-	raw_spin_lock(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
 	list_add(&work->entry, &g->pending_sema_waits);
-	raw_spin_unlock(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
 }
 
 /*
@@ -426,9 +426,9 @@ static void gk20a_add_pending_sema_wait(struct gk20a *g,
 static void gk20a_start_sema_wait_cancel(struct gk20a *g,
 					 struct list_head *list)
 {
-	raw_spin_lock(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
 	list_replace_init(&g->pending_sema_waits, list);
-	raw_spin_unlock(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
 }
 
 /*
@@ -486,10 +486,10 @@ static void gk20a_channel_semaphore_launcher(
 	 * This spinlock must protect a _very_ small critical section -
 	 * otherwise it's possible that the deterministic submit path suffers.
 	 */
-	raw_spin_lock(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
 	if (!list_empty(&g->pending_sema_waits))
 		list_del_init(&w->entry);
-	raw_spin_unlock(&g->pending_sema_waits_lock);
+	nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
 
 	gk20a_dbg_info("waiting for pre fence %p '%s'",
 			fence, fence->name);
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
index 32690c90..38d13b4b 100644
--- a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Clocks
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -457,7 +457,7 @@ static int gk20a_init_clk_setup_sw(struct gk20a *g)
 		clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
 	}
 
-	mutex_init(&clk->clk_mutex);
+	nvgpu_mutex_init(&clk->clk_mutex);
 
 	clk->sw_ready = true;
 
@@ -538,14 +538,14 @@ static int gk20a_clk_export_set_rate(void *data, unsigned long *rate)
 	struct clk_gk20a *clk = &g->clk;
 
 	if (rate) {
-		mutex_lock(&clk->clk_mutex);
+		nvgpu_mutex_acquire(&clk->clk_mutex);
 		old_freq = clk->gpc_pll.freq;
 		ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
 		if (!ret && clk->gpc_pll.enabled)
 			ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq);
 		if (!ret)
 			*rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-		mutex_unlock(&clk->clk_mutex);
+		nvgpu_mutex_release(&clk->clk_mutex);
 	}
 	return ret;
 }
@@ -556,9 +556,9 @@ static int gk20a_clk_export_enable(void *data)
 	struct gk20a *g = data;
 	struct clk_gk20a *clk = &g->clk;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	return ret;
 }
 
@@ -567,10 +567,10 @@ static void gk20a_clk_export_disable(void *data)
 	struct gk20a *g = data;
 	struct clk_gk20a *clk = &g->clk;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (g->clk.clk_hw_on)
 		clk_disable_gpcpll(g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 }
 
 static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state)
@@ -578,12 +578,12 @@ static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state)
 	struct gk20a *g = data;
 	struct clk_gk20a *clk = &g->clk;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (state)
 		*state = clk->gpc_pll.enabled;
 	if (rate)
 		*rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 }
 
 static struct tegra_clk_export_ops gk20a_clk_export_ops = {
@@ -640,11 +640,11 @@ static int gk20a_init_clk_support(struct gk20a *g)
 	if (err)
 		return err;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	clk->clk_hw_on = true;
 
 	err = gk20a_init_clk_setup_hw(g);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	if (err)
 		return err;
 
@@ -658,9 +658,9 @@ static int gk20a_init_clk_support(struct gk20a *g)
 		return err;
 
 	/* The prev call may not enable PLL if gbus is unbalanced - force it */
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	if (err)
 		return err;
 
@@ -680,10 +680,10 @@ static int gk20a_suspend_clk_support(struct gk20a *g)
 	clk_disable(g->clk.tegra_clk);
 
 	/* The prev call may not disable PLL if gbus is unbalanced - force it */
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 	ret = clk_disable_gpcpll(g, 1);
 	g->clk.clk_hw_on = false;
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 	return ret;
 }
 
@@ -714,10 +714,10 @@ static int pll_reg_show(struct seq_file *s, void *data)
 	struct gk20a *g = s->private;
 	u32 reg, m, n, pl, f;
 
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 	if (!g->clk.clk_hw_on) {
 		seq_printf(s, "gk20a powered down - no access to registers\n");
-		mutex_unlock(&g->clk.clk_mutex);
+		nvgpu_mutex_release(&g->clk.clk_mutex);
 		return 0;
 	}
 
@@ -733,7 +733,7 @@ static int pll_reg_show(struct seq_file *s, void *data)
 	f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]);
 	seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
 	seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
index a45dfcb7..8260fd4a 100644
--- a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 - 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011 - 2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -16,7 +16,7 @@
 #ifndef CLK_GK20A_H
 #define CLK_GK20A_H
 
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 
@@ -86,7 +86,7 @@ struct clk_gk20a {
 #endif
 	struct pll gpc_pll;
 	struct pll gpc_pll_last;
-	struct mutex clk_mutex;
+	struct nvgpu_mutex clk_mutex;
 	struct namemap_cfg *clk_namemap;
 	u32 namemap_num;
 	u32 *namemap_xlat_table;
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index aa92796c..4bc7ee52 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
  *
- * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -19,7 +19,7 @@
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-buf.h>
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 #include <linux/vmalloc.h>
 
 #include "gk20a.h"
@@ -557,7 +557,7 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 	gr = &g->gr;
 	*cs_client = NULL;
 
-	mutex_lock(&gr->cs_lock);
+	nvgpu_mutex_acquire(&gr->cs_lock);
 
 	ret = css_gr_create_shared_data(gr);
 	if (ret)
@@ -577,7 +577,7 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 	if (perfmon_start)
 		*perfmon_start = (*cs_client)->perfmon_start;
 
-	mutex_unlock(&gr->cs_lock);
+	nvgpu_mutex_release(&gr->cs_lock);
 
 	return 0;
 
@@ -591,7 +591,7 @@ failed:
 		if (list_empty(&gr->cs_data->clients))
 			css_gr_free_shared_data(gr);
 	}
-	mutex_unlock(&gr->cs_lock);
+	nvgpu_mutex_release(&gr->cs_lock);
 
 	if (perfmon_start)
 		*perfmon_start = 0;
@@ -610,7 +610,7 @@ int gr_gk20a_css_detach(struct channel_gk20a *ch,
 		return -EINVAL;
 
 	gr = &g->gr;
-	mutex_lock(&gr->cs_lock);
+	nvgpu_mutex_acquire(&gr->cs_lock);
 	if (gr->cs_data) {
 		struct gk20a_cs_snapshot *data = gr->cs_data;
 
@@ -623,7 +623,7 @@ int gr_gk20a_css_detach(struct channel_gk20a *ch,
 	} else {
 		ret = -EBADF;
 	}
-	mutex_unlock(&gr->cs_lock);
+	nvgpu_mutex_release(&gr->cs_lock);
 
 	return ret;
 }
@@ -639,9 +639,9 @@ int gr_gk20a_css_flush(struct channel_gk20a *ch,
 		return -EINVAL;
 
 	gr = &g->gr;
-	mutex_lock(&gr->cs_lock);
+	nvgpu_mutex_acquire(&gr->cs_lock);
 	ret = css_gr_flush_snapshots(ch);
-	mutex_unlock(&gr->cs_lock);
+	nvgpu_mutex_release(&gr->cs_lock);
 
 	return ret;
 }
@@ -651,10 +651,10 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
-	mutex_lock(&gr->cs_lock);
+	nvgpu_mutex_acquire(&gr->cs_lock);
 	css_gr_free_shared_data(gr);
-	mutex_unlock(&gr->cs_lock);
-	mutex_destroy(&gr->cs_lock);
+	nvgpu_mutex_release(&gr->cs_lock);
+	nvgpu_mutex_destroy(&gr->cs_lock);
 }
 
 static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 5c9baf77..351be55e 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -349,7 +349,7 @@ static int nvgpu_gpu_ioctl_inval_icache(
 	ops.offset	 = gr_pri_gpc0_gcc_dbg_r();
 
 	/* Take the global lock, since we'll be doing global regops */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
 
@@ -371,7 +371,7 @@ static int nvgpu_gpu_ioctl_inval_icache(
 	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
 
 end:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
@@ -384,9 +384,9 @@ static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
 		return -EINVAL;
 	}
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	g->ops.mm.set_debug_mode(g, args->state == 1);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	gk20a_idle(g->dev);
 	return 0;
@@ -403,13 +403,13 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
 	if (!ch)
 		return -EINVAL;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	if (g->ops.gr.set_sm_debug_mode)
 		err = g->ops.gr.set_sm_debug_mode(g, ch,
 				args->sms, !!args->enable);
 	else
 		err = -ENOSYS;
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return err;
 }
@@ -419,7 +419,7 @@ static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
 	int err = 0;
 	u32 dbgr_control0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	/* assert stop trigger. uniformity assumption: all SMs will have
 	 * the same state in dbg_control0. */
 	dbgr_control0 =
@@ -430,7 +430,7 @@ static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
 	gk20a_writel(g,
 		gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
@@ -456,7 +456,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
 			  gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
 			  gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	/* Lock down all SMs */
 	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
@@ -482,7 +482,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
 	}
 
 end:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	kfree(w_state);
 	return err;
 }
@@ -491,7 +491,7 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
 {
 	int err = 0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	/* Clear the pause mask to tell the GPU we want to resume everyone */
 	gk20a_writel(g,
@@ -505,7 +505,7 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
 	 * then a 1 to the run trigger */
 	gk20a_resume_all_sms(g);
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
@@ -551,7 +551,7 @@ static int nvgpu_gpu_ioctl_has_any_exception(
 	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
 	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
 
@@ -565,7 +565,7 @@ static int nvgpu_gpu_ioctl_has_any_exception(
 		tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
 	}
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	args->tpc_exception_en_sm_mask = tpc_exception_en;
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 705eccaa..ffd15a37 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -47,7 +47,7 @@ struct gk20a_ctxsw_dev {
 
 	atomic_t vma_ref;
 
-	struct mutex write_lock;
+	struct nvgpu_mutex write_lock;
 };
 
 
@@ -83,16 +83,16 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
 		"filp=%p buf=%p size=%zu", filp, buf, size);
 
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	while (ring_is_empty(hdr)) {
-		mutex_unlock(&dev->write_lock);
+		nvgpu_mutex_release(&dev->write_lock);
 		if (filp->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 		err = wait_event_interruptible(dev->readout_wq,
 			!ring_is_empty(hdr));
 		if (err)
 			return err;
-		mutex_lock(&dev->write_lock);
+		nvgpu_mutex_acquire(&dev->write_lock);
 	}
 
 	while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
@@ -101,7 +101,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
 
 		if (copy_to_user(entry, &dev->ents[hdr->read_idx],
 			sizeof(*entry))) {
-			mutex_unlock(&dev->write_lock);
+			nvgpu_mutex_release(&dev->write_lock);
 			return -EFAULT;
 		}
 
@@ -118,7 +118,7 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
 		hdr->read_idx);
 
 	*off = hdr->read_idx;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 	return copied;
 }
@@ -126,9 +126,9 @@ ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
 static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
 {
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	dev->write_enabled = true;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 	dev->g->ops.fecs_trace.enable(dev->g);
 	return 0;
 }
@@ -137,9 +137,9 @@ static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
 {
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
 	dev->g->ops.fecs_trace.disable(dev->g);
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	dev->write_enabled = false;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 	return 0;
 }
 
@@ -211,9 +211,9 @@ static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
 	if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
 		return -EINVAL;
 
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 	return ret;
 }
@@ -223,9 +223,9 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
 {
 	struct gk20a *g = dev->g;
 
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	dev->filter = args->filter;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 	if (g->ops.fecs_trace.set_filter)
 		g->ops.fecs_trace.set_filter(g, &dev->filter);
@@ -235,9 +235,9 @@ static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
 static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
 	struct nvgpu_ctxsw_trace_filter_args *args)
 {
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	args->filter = dev->filter;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 	return 0;
 }
@@ -293,7 +293,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
 
 	/* Allow only one user for this device */
 	dev = &trace->devs[vmid];
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	if (dev->hdr) {
 		err = -EBUSY;
 		goto done;
@@ -321,7 +321,7 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
 	}
 
 done:
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 idle:
 	gk20a_idle(g->dev);
@@ -338,9 +338,9 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
 
 	g->ops.fecs_trace.disable(g);
 
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	dev->write_enabled = false;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 	if (dev->hdr) {
 		dev->g->ops.fecs_trace.free_user_buffer(dev->g);
@@ -414,11 +414,11 @@ unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
 
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
 
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 	poll_wait(filp, &dev->readout_wq, wait);
 	if (!ring_is_empty(hdr))
 		mask |= POLLIN | POLLRDNORM;
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 
 	return mask;
 }
@@ -482,7 +482,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g)
 		dev->hdr = NULL;
 		dev->write_enabled = false;
 		init_waitqueue_head(&dev->readout_wq);
-		mutex_init(&dev->write_lock);
+		nvgpu_mutex_init(&dev->write_lock);
 		atomic_set(&dev->vma_ref, 0);
 		dev++;
 	}
@@ -567,7 +567,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
 		"dev=%p hdr=%p", dev, hdr);
 
-	mutex_lock(&dev->write_lock);
+	nvgpu_mutex_acquire(&dev->write_lock);
 
 	if (unlikely(!hdr)) {
 		/* device has been released */
@@ -621,7 +621,7 @@ int gk20a_ctxsw_trace_write(struct gk20a *g,
 	gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
 		hdr->read_idx, hdr->write_idx, ring_len(hdr));
 
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 	return ret;
 
 disable:
@@ -638,7 +638,7 @@ filter:
 			entry->tag, entry->timestamp, reason);
 
 done:
-	mutex_unlock(&dev->write_lock);
+	nvgpu_mutex_release(&dev->write_lock);
 	return ret;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index ac11e378..f6290e1d 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -44,9 +44,9 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
 	struct channel_gk20a *ch;
 	struct gk20a *g = dbg_s->g;
 
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 	if (list_empty(&dbg_s->ch_list)) {
-		mutex_unlock(&dbg_s->ch_list_lock);
+		nvgpu_mutex_release(&dbg_s->ch_list_lock);
 		return NULL;
 	}
 
@@ -55,7 +55,7 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
 				   ch_entry);
 	ch = g->fifo.channel + ch_data->chid;
 
-	mutex_unlock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
 	return ch;
 }
@@ -116,8 +116,8 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 
 	init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
 	INIT_LIST_HEAD(&dbg_session->ch_list);
-	mutex_init(&dbg_session->ch_list_lock);
-	mutex_init(&dbg_session->ioctl_lock);
+	nvgpu_mutex_init(&dbg_session->ch_list_lock);
+	nvgpu_mutex_init(&dbg_session->ioctl_lock);
 	dbg_session->dbg_events.events_enabled = false;
 	dbg_session->dbg_events.num_pending_events = 0;
 
@@ -127,61 +127,61 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 /* used in scenarios where the debugger session can take just the inter-session
  * lock for performance, but the profiler session must take the per-gpu lock
  * since it might not have an associated channel. */
-static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
+static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s)
 {
 	struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 
 	if (dbg_s->is_profiler || !ch)
-		mutex_lock(&dbg_s->g->dbg_sessions_lock);
+		nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock);
 	else
-		mutex_lock(&ch->dbg_s_lock);
+		nvgpu_mutex_acquire(&ch->dbg_s_lock);
 }
 
-static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
+static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s)
 {
 	struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 
 	if (dbg_s->is_profiler || !ch)
-		mutex_unlock(&dbg_s->g->dbg_sessions_lock);
+		nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock);
 	else
-		mutex_unlock(&ch->dbg_s_lock);
+		nvgpu_mutex_release(&ch->dbg_s_lock);
 }
 
 static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
 {
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
-	gk20a_dbg_session_mutex_lock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
 
 	dbg_s->dbg_events.events_enabled = true;
 	dbg_s->dbg_events.num_pending_events = 0;
 
-	gk20a_dbg_session_mutex_unlock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 }
 
 static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
 {
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
-	gk20a_dbg_session_mutex_lock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
 
 	dbg_s->dbg_events.events_enabled = false;
 	dbg_s->dbg_events.num_pending_events = 0;
 
-	gk20a_dbg_session_mutex_unlock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 }
 
 static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
 {
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
-	gk20a_dbg_session_mutex_lock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
 
 	if (dbg_s->dbg_events.events_enabled &&
 			dbg_s->dbg_events.num_pending_events > 0)
 		dbg_s->dbg_events.num_pending_events--;
 
-	gk20a_dbg_session_mutex_unlock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 }
 
 static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
@@ -232,7 +232,7 @@ unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
 
 	poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
 
-	gk20a_dbg_session_mutex_lock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
 
 	if (dbg_s->dbg_events.events_enabled &&
 			dbg_s->dbg_events.num_pending_events > 0) {
@@ -243,7 +243,7 @@ unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
 		mask = (POLLPRI | POLLIN);
 	}
 
-	gk20a_dbg_session_mutex_unlock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 
 	return mask;
 }
@@ -268,7 +268,7 @@ void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
 	/* guard against the session list being modified */
-	mutex_lock(&ch->dbg_s_lock);
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
 
 	list_for_each_entry(session_data, &ch->dbg_s_list, dbg_s_entry) {
 		dbg_s = session_data->dbg_s;
@@ -284,7 +284,7 @@ void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
 		}
 	}
 
-	mutex_unlock(&ch->dbg_s_lock);
+	nvgpu_mutex_release(&ch->dbg_s_lock);
 }
 
 bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
@@ -296,7 +296,7 @@ bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
 
 	/* guard against the session list being modified */
-	mutex_lock(&ch->dbg_s_lock);
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
 
 	list_for_each_entry(session_data, &ch->dbg_s_list, dbg_s_entry) {
 		dbg_s = session_data->dbg_s;
@@ -308,7 +308,7 @@ bool gk20a_dbg_gpu_broadcast_stop_trigger(struct channel_gk20a *ch)
 		}
 	}
 
-	mutex_unlock(&ch->dbg_s_lock);
+	nvgpu_mutex_release(&ch->dbg_s_lock);
 
 	return broadcast;
 }
@@ -321,7 +321,7 @@ int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
 
 	/* guard against the session list being modified */
-	mutex_lock(&ch->dbg_s_lock);
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
 
 	list_for_each_entry(session_data, &ch->dbg_s_list, dbg_s_entry) {
 		dbg_s = session_data->dbg_s;
@@ -332,7 +332,7 @@ int gk20a_dbg_gpu_clear_broadcast_stop_trigger(struct channel_gk20a *ch)
 		}
 	}
 
-	mutex_unlock(&ch->dbg_s_lock);
+	nvgpu_mutex_release(&ch->dbg_s_lock);
 
 	return 0;
 }
@@ -407,12 +407,12 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
 	struct dbg_session_channel_data *ch_data, *tmp;
 	struct gk20a *g = dbg_s->g;
 
-	mutex_lock(&g->dbg_sessions_lock);
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 	list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, ch_entry)
 		dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
-	mutex_unlock(&dbg_s->ch_list_lock);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return 0;
 }
@@ -435,25 +435,25 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 		return -EINVAL;
 	}
 
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 	list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
 		if (ch->hw_chid == ch_data->chid) {
 			channel_found = true;
 			break;
 		}
 	}
-	mutex_unlock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
 	if (!channel_found) {
 		gk20a_dbg_fn("channel not bounded, fd=%d\n", args->channel_fd);
 		return -EINVAL;
 	}
 
-	mutex_lock(&g->dbg_sessions_lock);
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 	err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
-	mutex_unlock(&dbg_s->ch_list_lock);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return err;
 }
@@ -472,11 +472,11 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 	 * which called powergate/timeout disable ioctl, to be killed without
 	 * calling powergate/timeout enable ioctl
 	 */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
 				NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
 	nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	kfree(dbg_s);
 	return 0;
@@ -510,8 +510,8 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 
 	gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
 
-	mutex_lock(&g->dbg_sessions_lock);
-	mutex_lock(&ch->dbg_s_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
 
 	ch_data = kzalloc(sizeof(*ch_data), GFP_KERNEL);
 	if (!ch_data) {
@@ -535,12 +535,12 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 
 	list_add(&session_data->dbg_s_entry, &ch->dbg_s_list);
 
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 	list_add_tail(&ch_data->ch_entry, &dbg_s->ch_list);
-	mutex_unlock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
-	mutex_unlock(&ch->dbg_s_lock);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&ch->dbg_s_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return 0;
 }
@@ -591,9 +591,9 @@ static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
 
 	gk20a_dbg_fn("powergate mode = %d", args->enable);
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	err = nvgpu_dbg_timeout_enable(dbg_s, args->enable);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return err;
 }
@@ -604,9 +604,9 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
 	int status;
 	struct gk20a *g = get_gk20a(dbg_s->dev);
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	status = g->timeouts_enabled;
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	if (status)
 		args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE;
@@ -620,11 +620,11 @@ static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
 {
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
-	gk20a_dbg_session_mutex_lock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
 
 	dbg_s->broadcast_stop_trigger = (args->broadcast != 0);
 
-	gk20a_dbg_session_mutex_unlock(dbg_s);
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
 
 	return 0;
 }
@@ -651,12 +651,12 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
 		if (write_size > args->sm_error_state_record_size)
 			write_size = args->sm_error_state_record_size;
 
-		mutex_lock(&g->dbg_sessions_lock);
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 		err = copy_to_user((void __user *)(uintptr_t)
 						args->sm_error_state_record_mem,
 				   sm_error_state,
 				   write_size);
-		mutex_unlock(&g->dbg_sessions_lock);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
 		if (err) {
 			gk20a_err(dev_from_gk20a(g), "copy_to_user failed!\n");
 			return err;
@@ -728,12 +728,12 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
 		if (read_size > args->sm_error_state_record_size)
 			read_size = args->sm_error_state_record_size;
 
-		mutex_lock(&g->dbg_sessions_lock);
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 		err = copy_from_user(sm_error_state,
 			  (void __user *)(uintptr_t)
 				args->sm_error_state_record_mem,
 			  read_size);
-		mutex_unlock(&g->dbg_sessions_lock);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
 		if (err) {
 			err = -ENOMEM;
 			goto err_free;
@@ -901,7 +901,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 	}
 
 	/* protect from threaded user space calls */
-	mutex_lock(&dbg_s->ioctl_lock);
+	nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
 
 	switch (cmd) {
 	case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
@@ -1007,7 +1007,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 	}
 
-	mutex_unlock(&dbg_s->ioctl_lock);
+	nvgpu_mutex_release(&dbg_s->ioctl_lock);
 
 	gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
 
@@ -1032,9 +1032,9 @@ static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
 {
 	int err;
 
-	mutex_lock(&gr->ctx_mutex);
+	nvgpu_mutex_acquire(&gr->ctx_mutex);
 	err = !gr->ctx_vars.golden_image_initialized;
-	mutex_unlock(&gr->ctx_mutex);
+	nvgpu_mutex_release(&gr->ctx_mutex);
 	if (err)
 		return false;
 	return true;
@@ -1089,7 +1089,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	/* since exec_reg_ops sends methods to the ucode, it must take the
 	 * global gpu lock to protect against mixing methods from debug sessions
 	 * on other channels */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	if (!dbg_s->is_pg_disabled && !gk20a_gpu_is_virtual(dbg_s->dev)) {
 		/* In the virtual case, the server will handle
@@ -1150,7 +1150,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 		}
 	}
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	if (!err && powergate_err)
 		err = powergate_err;
@@ -1276,9 +1276,9 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
 	gk20a_dbg_fn("%s  powergate mode = %d",
 		      dev_name(dbg_s->dev), args->mode);
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, args->mode);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return  err;
 }
 
@@ -1299,7 +1299,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	}
 
 	/* Take the global lock, since we'll be doing global regops */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 	if (!ch_gk20a) {
@@ -1319,7 +1319,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 
 	err = g->ops.regops.apply_smpc_war(dbg_s);
  clean_up:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	gk20a_idle(g->dev);
 	return  err;
 }
@@ -1341,7 +1341,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	}
 
 	/* Take the global lock, since we'll be doing global regops */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 	if (!ch_gk20a) {
@@ -1361,7 +1361,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	 * added here with gk20a being deprecated
 	 */
  clean_up:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	gk20a_idle(g->dev);
 	return  err;
 }
@@ -1386,7 +1386,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		return err;
 	}
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	/* Suspend GPU context switching */
 	err = gr_gk20a_disable_ctxsw(g);
@@ -1411,7 +1411,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n");
 
 clean_up:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	gk20a_idle(g->dev);
 
 	return  err;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
index 773a669c..caa9395b 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -67,14 +67,14 @@ struct dbg_session_gk20a {
 
 	/* list of bound channels, if any */
 	struct list_head ch_list;
-	struct mutex ch_list_lock;
+	struct nvgpu_mutex ch_list_lock;
 
 	/* event support */
 	struct dbg_gpu_session_events dbg_events;
 
 	bool broadcast_stop_trigger;
 
-	struct mutex ioctl_lock;
+	struct nvgpu_mutex ioctl_lock;
 };
 
 struct dbg_session_data {
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index d20229b3..4b8e61c4 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -421,7 +421,7 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
 #endif
 
 #ifdef CONFIG_DEBUG_FS
-	spin_lock_init(&g->debugfs_lock);
+	nvgpu_spinlock_init(&g->debugfs_lock);
 
 	g->mm.ltc_enabled = true;
 	g->mm.ltc_enabled_debug = true;
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 1f86fd8f..8244403e 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -60,8 +60,8 @@ struct gk20a_fecs_trace {
 
 	struct mem_desc trace_buf;
 	DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
-	struct mutex hash_lock;
-	struct mutex poll_lock;
+	struct nvgpu_mutex hash_lock;
+	struct nvgpu_mutex poll_lock;
 	struct task_struct *poll_task;
 };
 
@@ -133,14 +133,14 @@ void gk20a_fecs_trace_hash_dump(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table");
 
-	mutex_lock(&trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->hash_lock);
 	hash_for_each(trace->pid_hash_table, bkt, ent, node)
 	{
 		gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
 			ent, bkt, ent->context_ptr, ent->pid);
 
 	}
-	mutex_unlock(&trace->hash_lock);
+	nvgpu_mutex_release(&trace->hash_lock);
 }
 
 static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
@@ -161,9 +161,9 @@ static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid
 
 	he->context_ptr = context_ptr;
 	he->pid = pid;
-	mutex_lock(&trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->hash_lock);
 	hash_add(trace->pid_hash_table, &he->node, context_ptr);
-	mutex_unlock(&trace->hash_lock);
+	nvgpu_mutex_release(&trace->hash_lock);
 	return 0;
 }
 
@@ -176,7 +176,7 @@ static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
 		"freeing hash entry context_ptr=%x", context_ptr);
 
-	mutex_lock(&trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->hash_lock);
 	hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
 		context_ptr) {
 		if (ent->context_ptr == context_ptr) {
@@ -188,7 +188,7 @@ static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
 			break;
 		}
 	}
-	mutex_unlock(&trace->hash_lock);
+	nvgpu_mutex_release(&trace->hash_lock);
 }
 
 static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
@@ -200,12 +200,12 @@ static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
 
-	mutex_lock(&trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->hash_lock);
 	hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
 		hash_del(&ent->node);
 		kfree(ent);
 	}
-	mutex_unlock(&trace->hash_lock);
+	nvgpu_mutex_release(&trace->hash_lock);
 
 }
 
@@ -215,7 +215,7 @@ static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	pid_t pid = 0;
 
-	mutex_lock(&trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->hash_lock);
 	hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
 		if (ent->context_ptr == context_ptr) {
 			gk20a_dbg(gpu_dbg_ctxsw,
@@ -225,7 +225,7 @@ static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
 			break;
 		}
 	}
-	mutex_unlock(&trace->hash_lock);
+	nvgpu_mutex_release(&trace->hash_lock);
 
 	return pid;
 }
@@ -336,7 +336,7 @@ static int gk20a_fecs_trace_poll(struct gk20a *g)
 	if (unlikely(err))
 		return err;
 
-	mutex_lock(&trace->poll_lock);
+	nvgpu_mutex_acquire(&trace->poll_lock);
 	write = gk20a_fecs_trace_get_write_index(g);
 	if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
 		gk20a_err(dev_from_gk20a(g),
@@ -371,7 +371,7 @@ static int gk20a_fecs_trace_poll(struct gk20a *g)
 	gk20a_fecs_trace_set_read_index(g, read);
 
 done:
-	mutex_unlock(&trace->poll_lock);
+	nvgpu_mutex_release(&trace->poll_lock);
 	gk20a_idle(g->dev);
 	return err;
 }
@@ -580,8 +580,8 @@ static int gk20a_fecs_trace_init(struct gk20a *g)
 		goto clean;
 	}
 
-	mutex_init(&trace->poll_lock);
-	mutex_init(&trace->hash_lock);
+	nvgpu_mutex_init(&trace->poll_lock);
+	nvgpu_mutex_init(&trace->hash_lock);
 	hash_init(trace->pid_hash_table);
 
 	gk20a_fecs_trace_debugfs_init(g);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 4a32194c..c245f4a2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -477,7 +477,7 @@ void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
 		kfree(runlist->active_tsgs);
 		runlist->active_tsgs = NULL;
 
-		mutex_destroy(&runlist->mutex);
+		nvgpu_mutex_destroy(&runlist->mutex);
 
 	}
 	memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
@@ -650,7 +650,7 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 				goto clean_up_runlist;
 			}
 		}
-		mutex_init(&runlist->mutex);
+		nvgpu_mutex_init(&runlist->mutex);
 
 		/* None of buffers is pinned if this value doesn't change.
 		    Otherwise, one of them (cur_buffer) must have been pinned. */
@@ -809,8 +809,8 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 
 	f->g = g;
 
-	mutex_init(&f->intr.isr.mutex);
-	mutex_init(&f->gr_reset_mutex);
+	nvgpu_mutex_init(&f->intr.isr.mutex);
+	nvgpu_mutex_init(&f->gr_reset_mutex);
 	gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */
 
 	f->num_channels = g->ops.fifo.get_num_fifos(g);
@@ -846,7 +846,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	init_runlist(g, f);
 
 	INIT_LIST_HEAD(&f->free_chs);
-	mutex_init(&f->free_chs_mutex);
+	nvgpu_mutex_init(&f->free_chs_mutex);
 
 	if (g->ops.mm.is_bar1_supported(g))
 		err = gk20a_gmmu_alloc_map_sys(&g->mm.bar1.vm,
@@ -871,12 +871,12 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 		gk20a_init_channel_support(g, chid);
 		gk20a_init_tsg_support(g, chid);
 	}
-	mutex_init(&f->tsg_inuse_mutex);
+	nvgpu_mutex_init(&f->tsg_inuse_mutex);
 
 	f->remove_support = gk20a_remove_fifo_support;
 
 	f->deferred_reset_pending = false;
-	mutex_init(&f->deferred_reset_mutex);
+	nvgpu_mutex_init(&f->deferred_reset_mutex);
 
 	f->sw_ready = true;
 
@@ -1224,7 +1224,7 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
 	if (!ch)
 		return verbose;
 
-	mutex_lock(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
 	if (ch->error_notifier_ref) {
 		u32 err = ch->error_notifier->info32;
 		if (ch->error_notifier->status == 0xffff) {
@@ -1240,7 +1240,7 @@ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
 				NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
 		}
 	}
-	mutex_unlock(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&ch->error_notifier_mutex);
 
 	/* mark channel as faulted */
 	ch->has_timedout = true;
@@ -1309,7 +1309,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 {
 	u32 engine_id, engines;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	gr_gk20a_disable_ctxsw(g);
 
 	if (!g->fifo.deferred_reset_pending)
@@ -1336,7 +1336,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 
 clean_up:
 	gr_gk20a_enable_ctxsw(g);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return 0;
 }
@@ -1487,9 +1487,9 @@ static bool gk20a_fifo_handle_mmu_fault(
 		} else if (engine_id != FIFO_INVAL_ENGINE_ID) {
 			/* if lock is already taken, a reset is taking place
 			so no need to repeat */
-			if (mutex_trylock(&g->fifo.gr_reset_mutex)) {
+			if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
 				gk20a_fifo_reset_engine(g, engine_id);
-				mutex_unlock(&g->fifo.gr_reset_mutex);
+				nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
 			}
 		}
 
@@ -1646,7 +1646,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
 
 	/* stop context switching to prevent engine assignments from
 	   changing until channel is recovered */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	gr_gk20a_disable_ctxsw(g);
 
 	engines = gk20a_fifo_engines_on_id(g, hw_chid, false);
@@ -1667,7 +1667,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
 	}
 
 	gr_gk20a_enable_ctxsw(g);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 
 void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
@@ -1676,7 +1676,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
 
 	/* stop context switching to prevent engine assignments from
 	   changing until TSG is recovered */
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	gr_gk20a_disable_ctxsw(g);
 
 	engines = gk20a_fifo_engines_on_id(g, tsgid, true);
@@ -1693,7 +1693,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
 	}
 
 	gr_gk20a_enable_ctxsw(g);
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 
 void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
@@ -2307,7 +2307,7 @@ void gk20a_fifo_isr(struct gk20a *g)
 	if (g->fifo.sw_ready) {
 		/* note we're not actually in an "isr", but rather
 		 * in a threaded interrupt context... */
-		mutex_lock(&g->fifo.intr.isr.mutex);
+		nvgpu_mutex_acquire(&g->fifo.intr.isr.mutex);
 
 		gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
 
@@ -2322,7 +2322,7 @@ void gk20a_fifo_isr(struct gk20a *g)
 		if (unlikely(fifo_intr & error_intr_mask))
 			clear_intr = fifo_error_isr(g, fifo_intr);
 
-		mutex_unlock(&g->fifo.intr.isr.mutex);
+		nvgpu_mutex_release(&g->fifo.intr.isr.mutex);
 	}
 	gk20a_writel(g, fifo_intr_0_r(), clear_intr);
 
@@ -2434,7 +2434,7 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
 
 	/* we have no idea which runlist we are using. lock all */
 	for (i = 0; i < g->fifo.max_runlists; i++)
-		mutex_lock(&f->runlist_info[i].mutex);
+		nvgpu_mutex_acquire(&f->runlist_info[i].mutex);
 
 	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
@@ -2444,7 +2444,7 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
 		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
 	for (i = 0; i < g->fifo.max_runlists; i++)
-		mutex_unlock(&f->runlist_info[i].mutex);
+		nvgpu_mutex_release(&f->runlist_info[i].mutex);
 
 	return ret;
 }
@@ -2461,7 +2461,7 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 
 	/* we have no idea which runlist we are using. lock all */
 	for (i = 0; i < g->fifo.max_runlists; i++)
-		mutex_lock(&f->runlist_info[i].mutex);
+		nvgpu_mutex_acquire(&f->runlist_info[i].mutex);
 
 	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
@@ -2471,7 +2471,7 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
 	for (i = 0; i < g->fifo.max_runlists; i++)
-		mutex_unlock(&f->runlist_info[i].mutex);
+		nvgpu_mutex_release(&f->runlist_info[i].mutex);
 
 	return ret;
 }
@@ -3046,7 +3046,7 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
 
 	runlist = &f->runlist_info[runlist_id];
 
-	mutex_lock(&runlist->mutex);
+	nvgpu_mutex_acquire(&runlist->mutex);
 
 	mutex_ret = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
@@ -3056,7 +3056,7 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
 	if (!mutex_ret)
 		pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
-	mutex_unlock(&runlist->mutex);
+	nvgpu_mutex_release(&runlist->mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 20baf9de..1a248dba 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -41,7 +41,7 @@ struct fifo_runlist_info_gk20a {
 	u32  total_entries;
 	bool stopped;
 	bool support_tsg;
-	struct mutex mutex; /* protect channel preempt and runlist upate */
+	struct nvgpu_mutex mutex; /* protect channel preempt and runlist update */
 };
 
 enum {
@@ -120,18 +120,18 @@ struct fifo_gk20a {
 	struct channel_gk20a *channel;
 	/* zero-kref'd channels here */
 	struct list_head free_chs;
-	struct mutex free_chs_mutex;
-	struct mutex gr_reset_mutex;
+	struct nvgpu_mutex free_chs_mutex;
+	struct nvgpu_mutex gr_reset_mutex;
 
 	struct tsg_gk20a *tsg;
-	struct mutex tsg_inuse_mutex;
+	struct nvgpu_mutex tsg_inuse_mutex;
 
 	void (*remove_support)(struct fifo_gk20a *);
 	bool sw_ready;
 	struct {
 		/* share info between isrs and non-isr code */
 		struct {
-			struct mutex mutex;
+			struct nvgpu_mutex mutex;
 		} isr;
 		struct {
 			u32 device_fatal_0;
@@ -147,7 +147,7 @@ struct fifo_gk20a {
 
 	unsigned long deferred_fault_engines;
 	bool deferred_reset_pending;
-	struct mutex deferred_reset_mutex;
+	struct nvgpu_mutex deferred_reset_mutex;
 };
 
 static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 79c3fd09..32570d3d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -33,7 +33,7 @@
 #include <linux/thermal.h>
 #include <asm/cacheflush.h>
 #include <linux/debugfs.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <linux/clk/tegra.h>
 #include <linux/kthread.h>
 #include <linux/platform/tegra/common.h>
@@ -795,13 +795,13 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&g->poweroff_lock);
+	nvgpu_mutex_acquire(&g->poweroff_lock);
 
 	if (!g->power_on)
 		goto done;
 
 	if (gk20a_fifo_is_engine_busy(g)) {
-		mutex_unlock(&g->poweroff_lock);
+		nvgpu_mutex_release(&g->poweroff_lock);
 		return -EBUSY;
 	}
 	gk20a_scale_suspend(dev);
@@ -844,7 +844,7 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
 	gk20a_lockout_registers(g);
 
 done:
-	mutex_unlock(&g->poweroff_lock);
+	nvgpu_mutex_release(&g->poweroff_lock);
 
 	return ret;
 }
@@ -1373,9 +1373,9 @@ static int gk20a_pm_unrailgate(struct device *dev)
 	trace_gk20a_pm_unrailgate(dev_name(dev));
 
 	if (platform->unrailgate) {
-		mutex_lock(&platform->railgate_lock);
+		nvgpu_mutex_acquire(&platform->railgate_lock);
 		ret = platform->unrailgate(dev);
-		mutex_unlock(&platform->railgate_lock);
+		nvgpu_mutex_release(&platform->railgate_lock);
 	}
 
 #ifdef CONFIG_DEBUG_FS
@@ -1896,11 +1896,11 @@ void gk20a_disable(struct gk20a *g, u32 units)
 
 	gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
 
-	spin_lock(&g->mc_enable_lock);
+	nvgpu_spinlock_acquire(&g->mc_enable_lock);
 	pmc = gk20a_readl(g, mc_enable_r());
 	pmc &= ~units;
 	gk20a_writel(g, mc_enable_r(), pmc);
-	spin_unlock(&g->mc_enable_lock);
+	nvgpu_spinlock_release(&g->mc_enable_lock);
 }
 
 void gk20a_enable(struct gk20a *g, u32 units)
@@ -1909,12 +1909,12 @@ void gk20a_enable(struct gk20a *g, u32 units)
 
 	gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
 
-	spin_lock(&g->mc_enable_lock);
+	nvgpu_spinlock_acquire(&g->mc_enable_lock);
 	pmc = gk20a_readl(g, mc_enable_r());
 	pmc |= units;
 	gk20a_writel(g, mc_enable_r(), pmc);
 	gk20a_readl(g, mc_enable_r());
-	spin_unlock(&g->mc_enable_lock);
+	nvgpu_spinlock_release(&g->mc_enable_lock);
 
 	udelay(20);
 }
@@ -1953,7 +1953,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
 	down_write(&g->busy_lock);
 
 	/* acquire railgate lock to prevent unrailgate in midst of do_idle() */
-	mutex_lock(&platform->railgate_lock);
+	nvgpu_mutex_acquire(&platform->railgate_lock);
 
 	/* check if it is already railgated ? */
 	if (platform->is_railgated(dev))
@@ -1963,7 +1963,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
 	 * release railgate_lock, prevent suspend by incrementing usage counter,
 	 * re-acquire railgate_lock
 	 */
-	mutex_unlock(&platform->railgate_lock);
+	nvgpu_mutex_release(&platform->railgate_lock);
 	pm_runtime_get_sync(dev);
 
 	/*
@@ -1975,7 +1975,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
 		target_ref_cnt = 2;
 	else
 		target_ref_cnt = 1;
-	mutex_lock(&platform->railgate_lock);
+	nvgpu_mutex_acquire(&platform->railgate_lock);
 
 	nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
 			   NVGPU_TIMER_CPU_TIMER);
@@ -2052,7 +2052,7 @@ int __gk20a_do_idle(struct device *dev, bool force_reset)
 fail_drop_usage_count:
 	pm_runtime_put_noidle(dev);
 fail_timeout:
-	mutex_unlock(&platform->railgate_lock);
+	nvgpu_mutex_release(&platform->railgate_lock);
 	up_write(&g->busy_lock);
 	return -EBUSY;
 }
@@ -2101,7 +2101,7 @@ int __gk20a_do_unidle(struct device *dev)
 	}
 
 	/* release the lock and open up all other busy() calls */
-	mutex_unlock(&platform->railgate_lock);
+	nvgpu_mutex_release(&platform->railgate_lock);
 	up_write(&g->busy_lock);
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 31b02378..acc3b975 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -29,7 +29,7 @@ struct gk20a_ctxsw_trace;
 struct acr_desc;
 
 #include <linux/sched.h>
-#include <linux/spinlock.h>
+#include <nvgpu/lock.h>
 #include <linux/nvgpu.h>
 #include <linux/irqreturn.h>
 #include <soc/tegra/chip-id.h>
@@ -871,9 +871,9 @@ struct gk20a {
 	bool timeouts_enabled;
 #endif
 
-	struct mutex ch_wdt_lock;
+	struct nvgpu_mutex ch_wdt_lock;
 
-	struct mutex poweroff_lock;
+	struct nvgpu_mutex poweroff_lock;
 
 	/* Channel priorities */
 	u32 timeslice_low_priority_us;
@@ -900,7 +900,7 @@ struct gk20a {
 	u32 emc3d_ratio;
 
 #ifdef CONFIG_DEBUG_FS
-	spinlock_t debugfs_lock;
+	struct nvgpu_spinlock debugfs_lock;
 	struct dentry *debugfs_ltc_enabled;
 	struct dentry *debugfs_timeouts_enabled;
 	struct dentry *debugfs_gr_idle_timeout_default;
@@ -924,11 +924,11 @@ struct gk20a {
 
 	/* List of pending SW semaphore waits. */
 	struct list_head pending_sema_waits;
-	raw_spinlock_t pending_sema_waits_lock;
+	struct nvgpu_raw_spinlock pending_sema_waits_lock;
 
 	/* held while manipulating # of debug/profiler sessions present */
 	/* also prevents debug sessions from attaching until released */
-	struct mutex dbg_sessions_lock;
+	struct nvgpu_mutex dbg_sessions_lock;
 	int dbg_powergating_disabled_refcount; /*refcount for pg disable */
 	int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
 
@@ -942,7 +942,7 @@ struct gk20a {
 	u64 pg_ungating_time_us;
 	u32 pg_gating_cnt;
 
-	spinlock_t mc_enable_lock;
+	struct nvgpu_spinlock mc_enable_lock;
 
 	struct nvgpu_gpu_characteristics gpu_characteristics;
 
@@ -983,7 +983,7 @@ struct gk20a {
 		struct device *node;
 	} sched;
 
-	struct mutex client_lock;
+	struct nvgpu_mutex client_lock;
 	int client_refcount; /* open channels and ctrl nodes */
 
 	dev_t cdev_region;
@@ -1289,11 +1289,11 @@ static inline u32 get_field(u32 reg, u32 mask)
 /* invalidate channel lookup tlb */
 static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr)
 {
-	spin_lock(&gr->ch_tlb_lock);
+	nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
 	memset(gr->chid_tlb, 0,
 		sizeof(struct gr_channel_map_tlb_entry) *
 		GR_CHANNEL_MAP_TLB_SIZE);
-	spin_unlock(&gr->ch_tlb_lock);
+	nvgpu_spinlock_release(&gr->ch_tlb_lock);
 }
 
 /* classes that the device supports */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d3b91a50..aad6c07b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -538,7 +538,7 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
 	struct gr_gk20a *gr = &g->gr;
 	int ret;
 
-	mutex_lock(&gr->fecs_mutex);
+	nvgpu_mutex_acquire(&gr->fecs_mutex);
 
 	if (op.mailbox.id != 0)
 		gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
@@ -561,7 +561,7 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
 				      op.cond.fail, op.mailbox.fail,
 				      sleepduringwait);
 
-	mutex_unlock(&gr->fecs_mutex);
+	nvgpu_mutex_release(&gr->fecs_mutex);
 
 	return ret;
 }
@@ -573,7 +573,7 @@ int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
 	struct gr_gk20a *gr = &g->gr;
 	int ret;
 
-	mutex_lock(&gr->fecs_mutex);
+	nvgpu_mutex_acquire(&gr->fecs_mutex);
 
 	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(op.mailbox.id),
 		gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
@@ -587,7 +587,7 @@ int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
 				      op.cond.fail, op.mailbox.fail,
 				      false);
 
-	mutex_unlock(&gr->fecs_mutex);
+	nvgpu_mutex_release(&gr->fecs_mutex);
 
 	return ret;
 }
@@ -1596,7 +1596,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 	/* golden ctx is global to all channels. Although only the first
 	   channel initializes golden image, driver needs to prevent multiple
 	   channels from initializing golden ctx at the same time */
-	mutex_lock(&gr->ctx_mutex);
+	nvgpu_mutex_acquire(&gr->ctx_mutex);
 
 	if (gr->ctx_vars.golden_image_initialized) {
 		goto clean_up;
@@ -1825,7 +1825,7 @@ clean_up:
 	gk20a_mem_end(g, gold_mem);
 	gk20a_mem_end(g, gr_mem);
 
-	mutex_unlock(&gr->ctx_mutex);
+	nvgpu_mutex_release(&gr->ctx_mutex);
 	return err;
 }
 
@@ -3327,7 +3327,7 @@ out:
 int gk20a_comptag_allocator_init(struct gk20a_comptag_allocator *allocator,
 		unsigned long size)
 {
-	mutex_init(&allocator->lock);
+	nvgpu_mutex_init(&allocator->lock);
 	/*
 	 * 0th comptag is special and is never used. The base for this bitmap
 	 * is 1, and its size is one less than the size of comptag store.
@@ -4064,7 +4064,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
 
 	/* no endian swap ? */
 
-	mutex_lock(&gr->zbc_lock);
+	nvgpu_mutex_acquire(&gr->zbc_lock);
 	switch (zbc_val->type) {
 	case GK20A_ZBC_TYPE_COLOR:
 		/* search existing tables */
@@ -4159,7 +4159,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
 	}
 
 err_mutex:
-	mutex_unlock(&gr->zbc_lock);
+	nvgpu_mutex_release(&gr->zbc_lock);
 	return ret;
 }
 
@@ -4267,7 +4267,7 @@ int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
 	struct zbc_entry zbc_val;
 	u32 i, err;
 
-	mutex_init(&gr->zbc_lock);
+	nvgpu_mutex_init(&gr->zbc_lock);
 
 	/* load default color table */
 	zbc_val.type = GK20A_ZBC_TYPE_COLOR;
@@ -5136,7 +5136,7 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
 	gr->g = g;
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-	mutex_init(&g->gr.cs_lock);
+	nvgpu_mutex_init(&g->gr.cs_lock);
 #endif
 
 	err = gr_gk20a_init_gr_config(g, gr);
@@ -5172,8 +5172,8 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
 
 	gr_gk20a_load_zbc_default_table(g, gr);
 
-	mutex_init(&gr->ctx_mutex);
-	spin_lock_init(&gr->ch_tlb_lock);
+	nvgpu_mutex_init(&gr->ctx_mutex);
+	nvgpu_spinlock_init(&gr->ch_tlb_lock);
 
 	gr->remove_support = gk20a_remove_gr_support;
 	gr->sw_ready = true;
@@ -5244,7 +5244,7 @@ int gk20a_init_gr_support(struct gk20a *g)
 	gk20a_dbg_fn("");
 
 	/* this is required before gr_gk20a_init_ctx_state */
-	mutex_init(&g->gr.fecs_mutex);
+	nvgpu_mutex_init(&g->gr.fecs_mutex);
 
 	err = gr_gk20a_init_ctxsw(g);
 	if (err)
@@ -5468,7 +5468,7 @@ int gk20a_gr_reset(struct gk20a *g)
 	int err;
 	u32 size;
 
-	mutex_lock(&g->gr.fecs_mutex);
+	nvgpu_mutex_acquire(&g->gr.fecs_mutex);
 
 	err = gk20a_enable_gr_hw(g);
 	if (err)
@@ -5482,7 +5482,7 @@ int gk20a_gr_reset(struct gk20a *g)
 	if (err)
 		return err;
 
-	mutex_unlock(&g->gr.fecs_mutex);
+	nvgpu_mutex_release(&g->gr.fecs_mutex);
 
 	/* this appears query for sw states but fecs actually init
 	   ramchain, etc so this is hw init */
@@ -5731,7 +5731,7 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
 	if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
 		return 0;
 
-	mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
 
 	virtual_address = ch->cyclestate.cyclestate_buffer;
 	buffer_size = ch->cyclestate.cyclestate_buffer_size;
@@ -5843,7 +5843,7 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
 		sh_hdr->completed = true;
 		offset += sh_hdr->size;
 	}
-	mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
 #endif
 	gk20a_dbg_fn("");
 	wake_up(&ch->notifier_wq);
@@ -5874,7 +5874,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
 	if (!gr_fecs_current_ctx_valid_v(curr_ctx))
 		return NULL;
 
-	spin_lock(&gr->ch_tlb_lock);
+	nvgpu_spinlock_acquire(&gr->ch_tlb_lock);
 
 	/* check cache first */
 	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5926,7 +5926,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
 		(GR_CHANNEL_MAP_TLB_SIZE - 1);
 
 unlock:
-	spin_unlock(&gr->ch_tlb_lock);
+	nvgpu_spinlock_release(&gr->ch_tlb_lock);
 	if (curr_tsgid)
 		*curr_tsgid = tsgid;
 	return ret;
@@ -5998,7 +5998,7 @@ static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
 					       GPU_LIT_TPC_IN_GPC_STRIDE);
 	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
 			gr_gpc0_tpc0_sm_cfg_r() + offset));
@@ -6012,7 +6012,7 @@ static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
 	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
 			gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset);
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return 0;
 }
@@ -6029,7 +6029,7 @@ static int gk20a_gr_update_sm_error_state(struct gk20a *g,
 					       GPU_LIT_TPC_IN_GPC_STRIDE);
 	int err = 0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	gr->sm_error_states[sm_id].hww_global_esr =
 			sm_error_state->hww_global_esr;
@@ -6081,7 +6081,7 @@ enable_ctxsw:
 	err = gr_gk20a_enable_ctxsw(g);
 
 fail:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
@@ -6096,7 +6096,7 @@ static int gk20a_gr_clear_sm_error_state(struct gk20a *g,
 					       GPU_LIT_TPC_IN_GPC_STRIDE);
 	int err = 0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
 
@@ -6122,7 +6122,7 @@ static int gk20a_gr_clear_sm_error_state(struct gk20a *g,
 	err = gr_gk20a_enable_ctxsw(g);
 
 fail:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
@@ -9128,7 +9128,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
 	struct dbg_session_channel_data *ch_data;
 	int err = 0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	err = gr_gk20a_disable_ctxsw(g);
 	if (err) {
@@ -9136,7 +9136,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
 		goto clean_up;
 	}
 
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 
 	list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
 		ch = g->fifo.channel + ch_data->chid;
@@ -9146,7 +9146,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
 			local_ctx_resident_ch_fd = ch_data->channel_fd;
 	}
 
-	mutex_unlock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
 	err = gr_gk20a_enable_ctxsw(g);
 	if (err)
@@ -9155,7 +9155,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g,
 	*ctx_resident_ch_fd = local_ctx_resident_ch_fd;
 
 clean_up:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return err;
 }
@@ -9170,7 +9170,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
 	int err = 0;
 	struct dbg_session_channel_data *ch_data;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	err = gr_gk20a_disable_ctxsw(g);
 	if (err) {
@@ -9193,7 +9193,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
 	*ctx_resident_ch_fd = local_ctx_resident_ch_fd;
 
 clean_up:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 5a987a82..2dd1eaf5 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -275,8 +275,8 @@ struct gr_gk20a {
 		bool valid;
 	} ctx_vars;
 
-	struct mutex ctx_mutex; /* protect golden ctx init */
-	struct mutex fecs_mutex; /* protect fecs method */
+	struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */
+	struct nvgpu_mutex fecs_mutex; /* protect fecs method */
 
 #define GR_NETLIST_DYNAMIC	-1
 #define GR_NETLIST_STATIC_A	'A'
@@ -333,7 +333,7 @@ struct gr_gk20a {
 	u32 max_comptag_mem; /* max memory size (MB) for comptag */
 	struct compbit_store_desc compbit_store;
 	struct gk20a_comptag_allocator {
-		struct mutex lock;
+		struct nvgpu_mutex lock;
 		/* this bitmap starts at ctag 1. 0th cannot be taken */
 		unsigned long *bitmap;
 		/* size of bitmap, not max ctags, so one less */
@@ -342,7 +342,7 @@ struct gr_gk20a {
 
 	struct gr_zcull_gk20a zcull;
 
-	struct mutex zbc_lock;
+	struct nvgpu_mutex zbc_lock;
 	struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
 	struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
 #ifdef CONFIG_TEGRA_19x_GPU
@@ -363,7 +363,7 @@ struct gr_gk20a {
 #define GR_CHANNEL_MAP_TLB_SIZE		2 /* must of power of 2 */
 	struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
 	u32 channel_tlb_flush_index;
-	spinlock_t ch_tlb_lock;
+	struct nvgpu_spinlock ch_tlb_lock;
 
 	void (*remove_support)(struct gr_gk20a *gr);
 	bool sw_ready;
@@ -379,7 +379,7 @@ struct gr_gk20a {
 	struct sm_info *sm_to_cluster;
 	struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-	struct mutex			cs_lock;
+	struct nvgpu_mutex			cs_lock;
 	struct gk20a_cs_snapshot	*cs_data;
 #endif
 };
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index e1c23f79..13819872 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -3,7 +3,7 @@
  *
  * GK20A Graphics
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -138,7 +138,7 @@ static void gk20a_ltc_sync_debugfs(struct gk20a *g)
 {
 	u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
 
-	spin_lock(&g->debugfs_lock);
+	nvgpu_spinlock_acquire(&g->debugfs_lock);
 	if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
 		u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
 		if (g->mm.ltc_enabled_debug)
@@ -151,6 +151,6 @@ static void gk20a_ltc_sync_debugfs(struct gk20a *g)
 		gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
 		g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
 	}
-	spin_unlock(&g->debugfs_lock);
+	nvgpu_spinlock_release(&g->debugfs_lock);
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 103952ca..5db48ae7 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A L2
  *
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,7 +120,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 	if (gr->compbit_store.mem.size == 0)
 		return 0;
 
-	mutex_lock(&g->mm.l2_op_lock);
+	nvgpu_mutex_acquire(&g->mm.l2_op_lock);
 
 	if (op == gk20a_cbc_op_clear) {
 		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
@@ -168,7 +168,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 	}
 out:
 	trace_gk20a_ltc_cbc_ctrl_done(dev_name(g->dev));
-	mutex_unlock(&g->mm.l2_op_lock);
+	nvgpu_mutex_release(&g->mm.l2_op_lock);
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c62d1f6c..2539138a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -142,7 +142,7 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem,
 
 	WARN_ON(!bufbase);
 
-	spin_lock(&g->mm.pramin_window_lock);
+	nvgpu_spinlock_acquire(&g->mm.pramin_window_lock);
 
 	if (g->mm.pramin_window != win) {
 		gk20a_writel(g, bus_bar0_window_r(), win);
@@ -158,7 +158,7 @@ static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem,
 {
 	gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk);
 
-	spin_unlock(&g->mm.pramin_window_lock);
+	nvgpu_spinlock_release(&g->mm.pramin_window_lock);
 }
 
 /*
@@ -483,7 +483,7 @@ static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
 static struct gk20a *gk20a_vidmem_buf_owner(struct dma_buf *dmabuf);
 
 struct gk20a_dmabuf_priv {
-	struct mutex lock;
+	struct nvgpu_mutex lock;
 
 	struct gk20a_comptag_allocator *comptag_allocator;
 	struct gk20a_comptags comptags;
@@ -514,7 +514,7 @@ static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
 	unsigned long addr;
 	int err = 0;
 
-	mutex_lock(&allocator->lock);
+	nvgpu_mutex_acquire(&allocator->lock);
 	addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size,
 			0, len, 0);
 	if (addr < allocator->size) {
@@ -524,7 +524,7 @@ static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
 	} else {
 		err = -ENOMEM;
 	}
-	mutex_unlock(&allocator->lock);
+	nvgpu_mutex_release(&allocator->lock);
 
 	return err;
 }
@@ -538,9 +538,9 @@ static void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
 	WARN_ON(addr > allocator->size);
 	WARN_ON(addr + len > allocator->size);
 
-	mutex_lock(&allocator->lock);
+	nvgpu_mutex_acquire(&allocator->lock);
 	bitmap_clear(allocator->bitmap, addr, len);
-	mutex_unlock(&allocator->lock);
+	nvgpu_mutex_release(&allocator->lock);
 }
 
 static void gk20a_mm_delete_priv(void *_priv)
@@ -575,12 +575,12 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
 	if (WARN_ON(!priv))
 		return ERR_PTR(-EINVAL);
 
-	mutex_lock(&priv->lock);
+	nvgpu_mutex_acquire(&priv->lock);
 
 	if (priv->pin_count == 0) {
 		priv->attach = dma_buf_attach(dmabuf, dev);
 		if (IS_ERR(priv->attach)) {
-			mutex_unlock(&priv->lock);
+			nvgpu_mutex_release(&priv->lock);
 			return (struct sg_table *)priv->attach;
 		}
 
@@ -588,13 +588,13 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
 						   DMA_BIDIRECTIONAL);
 		if (IS_ERR(priv->sgt)) {
 			dma_buf_detach(dmabuf, priv->attach);
-			mutex_unlock(&priv->lock);
+			nvgpu_mutex_release(&priv->lock);
 			return priv->sgt;
 		}
 	}
 
 	priv->pin_count++;
-	mutex_unlock(&priv->lock);
+	nvgpu_mutex_release(&priv->lock);
 	return priv->sgt;
 }
 
@@ -607,7 +607,7 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
 	if (IS_ERR(priv) || !priv)
 		return;
 
-	mutex_lock(&priv->lock);
+	nvgpu_mutex_acquire(&priv->lock);
 	WARN_ON(priv->sgt != sgt);
 	priv->pin_count--;
 	WARN_ON(priv->pin_count < 0);
@@ -617,7 +617,7 @@ void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
 					 DMA_BIDIRECTIONAL);
 		dma_buf_detach(dmabuf, priv->attach);
 	}
-	mutex_unlock(&priv->lock);
+	nvgpu_mutex_release(&priv->lock);
 }
 
 void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
@@ -842,7 +842,7 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
 static void gk20a_init_pramin(struct mm_gk20a *mm)
 {
 	mm->pramin_window = 0;
-	spin_lock_init(&mm->pramin_window_lock);
+	nvgpu_spinlock_init(&mm->pramin_window_lock);
 	mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
 }
 
@@ -971,12 +971,12 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
 	mm->vidmem.bootstrap_base = bootstrap_base;
 	mm->vidmem.bootstrap_size = bootstrap_size;
 
-	mutex_init(&mm->vidmem.first_clear_mutex);
+	nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
 
 	INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
 	atomic64_set(&mm->vidmem.bytes_pending, 0);
 	INIT_LIST_HEAD(&mm->vidmem.clear_list_head);
-	mutex_init(&mm->vidmem.clear_list_mutex);
+	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
 
 	gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M);
 
@@ -998,7 +998,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
 	}
 
 	mm->g = g;
-	mutex_init(&mm->l2_op_lock);
+	nvgpu_mutex_init(&mm->l2_op_lock);
 
 	/*TBD: make channel vm size configurable */
 	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
@@ -1484,12 +1484,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
 		return 0;
 	}
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	buffer_list = nvgpu_kalloc(sizeof(*buffer_list) *
 			      vm->num_user_mapped_buffers, true);
 	if (!buffer_list) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		return -ENOMEM;
 	}
 
@@ -1510,7 +1510,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
 	*num_buffers = vm->num_user_mapped_buffers;
 	*mapped_buffers = buffer_list;
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	return 0;
 }
@@ -1544,9 +1544,9 @@ void gk20a_vm_mapping_batch_finish_locked(
 void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm,
 				   struct vm_gk20a_mapping_batch *mapping_batch)
 {
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch);
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 
 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
@@ -1559,7 +1559,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
 	if (num_buffers == 0)
 		return;
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	gk20a_vm_mapping_batch_start(&batch);
 	vm->kref_put_batch = &batch;
 
@@ -1569,7 +1569,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
 
 	vm->kref_put_batch = NULL;
 	gk20a_vm_mapping_batch_finish_locked(vm, &batch);
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	nvgpu_kfree(mapped_buffers);
 }
@@ -1581,17 +1581,17 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 	int retries = 10000; /* 50 ms */
 	struct mapped_buffer_node *mapped_buffer;
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
 	if (!mapped_buffer) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
 		return;
 	}
 
 	if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 		while (retries >= 0 || !tegra_platform_is_silicon()) {
 			if (atomic_read(&mapped_buffer->ref.refcount) == 1)
@@ -1602,11 +1602,11 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 		if (retries < 0 && tegra_platform_is_silicon())
 			gk20a_err(d, "sync-unmap failed on 0x%llx",
 								offset);
-		mutex_lock(&vm->update_gmmu_lock);
+		nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	}
 
 	if (mapped_buffer->user_mapped == 0) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d, "addr already unmapped from user 0x%llx", offset);
 		return;
 	}
@@ -1619,7 +1619,7 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 	kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
 	vm->kref_put_batch = NULL;
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 
 u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
@@ -2239,7 +2239,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
 	buf->g = g;
 
 	if (!g->mm.vidmem.cleared) {
-		mutex_lock(&g->mm.vidmem.first_clear_mutex);
+		nvgpu_mutex_acquire(&g->mm.vidmem.first_clear_mutex);
 		if (!g->mm.vidmem.cleared) {
 			err = gk20a_vidmem_clear_all(g);
 			if (err) {
@@ -2248,7 +2248,7 @@ int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
 				goto err_kfree;
 			}
 		}
-		mutex_unlock(&g->mm.vidmem.first_clear_mutex);
+		nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
 	}
 
 	buf->mem = kzalloc(sizeof(struct mem_desc), GFP_KERNEL);
@@ -2301,10 +2301,10 @@ int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
 	if (!nvgpu_alloc_initialized(allocator))
 		return -ENOSYS;
 
-	mutex_lock(&g->mm.vidmem.clear_list_mutex);
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
 	*space = nvgpu_alloc_space(allocator) +
 		atomic64_read(&g->mm.vidmem.bytes_pending);
-	mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
 	return 0;
 #else
 	return -ENOSYS;
@@ -2425,7 +2425,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 		return -EFAULT;
 	}
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	/* check if this buffer is already mapped */
 	if (!vm->userspace_managed) {
@@ -2434,7 +2434,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 			flags, kind, sgt,
 			user_mapped, rw_flag);
 		if (map_offset) {
-			mutex_unlock(&vm->update_gmmu_lock);
+			nvgpu_mutex_release(&vm->update_gmmu_lock);
 			return map_offset;
 		}
 	}
@@ -2627,7 +2627,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 		mapped_buffer->va_node = va_node;
 	}
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	return map_offset;
 
@@ -2643,7 +2643,7 @@ clean_up:
 	if (!IS_ERR(bfr.sgt))
 		gk20a_mm_unpin(d, dmabuf, bfr.sgt);
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 	gk20a_dbg_info("err=%d\n", err);
 	return 0;
 }
@@ -2658,13 +2658,13 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
 	struct mapped_buffer_node *mapped_buffer;
 	struct device *d = dev_from_vm(vm);
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
 
 	if (!mapped_buffer || !mapped_buffer->user_mapped)
 	{
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
 		return -EFAULT;
 	}
@@ -2685,7 +2685,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
 		*mapping_ctagline = mapped_buffer->ctag_offset;
 	}
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 	return 0;
 }
 
@@ -2716,19 +2716,19 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 		return -EFAULT;
 	}
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	mapped_buffer =
 		find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
 
 	if (!mapped_buffer || !mapped_buffer->user_mapped) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
 		return -EFAULT;
 	}
 
 	if (!mapped_buffer->ctags_mappable) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
 			  __func__, mapping_gva);
 		return -EFAULT;
@@ -2747,7 +2747,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 		u64 cacheline_offset_start;
 
 		if (!mapped_buffer->ctag_map_win_size) {
-			mutex_unlock(&vm->update_gmmu_lock);
+			nvgpu_mutex_release(&vm->update_gmmu_lock);
 			gk20a_err(d,
 				  "%s: mapping 0x%llx does not have "
 				  "mappable comptags",
@@ -2774,7 +2774,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 				mapped_buffer->ctag_map_win_size, &va_node);
 
 			if (err) {
-				mutex_unlock(&vm->update_gmmu_lock);
+				nvgpu_mutex_release(&vm->update_gmmu_lock);
 				return err;
 			}
 
@@ -2783,7 +2783,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 				 * pointer if the space is freed
 				 * before before the buffer is
 				 * unmapped */
-				mutex_unlock(&vm->update_gmmu_lock);
+				nvgpu_mutex_release(&vm->update_gmmu_lock);
 				gk20a_err(d,
 					  "%s: comptags cannot be mapped into allocated space",
 					  __func__);
@@ -2810,7 +2810,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 				g->gr.compbit_store.mem.aperture);
 
 		if (!mapped_buffer->ctag_map_win_addr) {
-			mutex_unlock(&vm->update_gmmu_lock);
+			nvgpu_mutex_release(&vm->update_gmmu_lock);
 			gk20a_err(d,
 				  "%s: failed to map comptags for mapping 0x%llx",
 				  __func__, mapping_gva);
@@ -2818,7 +2818,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 		}
 	} else if (fixed_mapping && *compbits_win_gva &&
 		   mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d,
 			  "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
 			  "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
@@ -2830,7 +2830,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 	*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
 	*compbits_win_gva = mapped_buffer->ctag_map_win_addr;
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	return 0;
 }
@@ -2852,7 +2852,7 @@ static u64 __gk20a_gmmu_map(struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);
 	u64 vaddr;
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	vaddr = g->ops.mm.gmmu_map(vm, addr,
 				*sgt, /* sg table */
 				0, /* sg offset */
@@ -2866,7 +2866,7 @@ static u64 __gk20a_gmmu_map(struct vm_gk20a *vm,
 				priv, /* priv */
 				NULL, /* mapping_batch handle */
 				aperture);
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 	if (!vaddr) {
 		gk20a_err(dev_from_vm(vm), "failed to allocate va space");
 		return 0;
@@ -3128,10 +3128,10 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
 	 * are not done anyway */
 	WARN_ON(attr != 0 && attr != DMA_ATTR_NO_KERNEL_MAPPING);
 
-	mutex_lock(&g->mm.vidmem.clear_list_mutex);
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
 	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
 	addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
-	mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
 	if (!addr) {
 		/*
 		 * If memory is known to be freed soon, let the user know that
@@ -3188,12 +3188,12 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr,
 	bool was_empty;
 
 	if (mem->user_mem) {
-		mutex_lock(&g->mm.vidmem.clear_list_mutex);
+		nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
 		was_empty = list_empty(&g->mm.vidmem.clear_list_head);
 		list_add_tail(&mem->clear_list_entry,
 			      &g->mm.vidmem.clear_list_head);
 		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
-		mutex_unlock(&g->mm.vidmem.clear_list_mutex);
+		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
 
 		if (was_empty) {
 			cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
@@ -3258,12 +3258,12 @@ static struct mem_desc *get_pending_mem_desc(struct mm_gk20a *mm)
 {
 	struct mem_desc *mem = NULL;
 
-	mutex_lock(&mm->vidmem.clear_list_mutex);
+	nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
 	mem = list_first_entry_or_null(&mm->vidmem.clear_list_head,
 			struct mem_desc, clear_list_entry);
 	if (mem)
 		list_del_init(&mem->clear_list_entry);
-	mutex_unlock(&mm->vidmem.clear_list_mutex);
+	nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
 
 	return mem;
 }
@@ -3409,12 +3409,12 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
 	dma_addr_t addr = 0;
 	struct gk20a *g = gk20a_from_vm(vm);
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr);
 	if (buffer)
 		addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
 				buffer->flags);
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	return addr;
 }
@@ -3426,7 +3426,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	g->ops.mm.gmmu_unmap(vm,
 			vaddr,
 			size,
@@ -3435,7 +3435,7 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
 			rw_flag,
 			false,
 			NULL);
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 
 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
@@ -4053,16 +4053,16 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
 	struct device *d = dev_from_vm(vm);
 	struct mapped_buffer_node *mapped_buffer;
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
 	if (!mapped_buffer) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
 		return;
 	}
 
 	kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 
 static void gk20a_vm_free_entries(struct vm_gk20a *vm,
@@ -4101,7 +4101,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
 		}
 	}
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	/* TBD: add a flag here for the unmap code to recognize teardown
 	 * and short-circuit any otherwise expensive operations. */
@@ -4123,7 +4123,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
 
 	gk20a_deinit_vm(vm);
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
 
 void gk20a_vm_remove_support(struct vm_gk20a *vm)
@@ -4547,7 +4547,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 
 	vm->mapped_buffers = RB_ROOT;
 
-	mutex_init(&vm->update_gmmu_lock);
+	nvgpu_mutex_init(&vm->update_gmmu_lock);
 	kref_init(&vm->ref);
 	INIT_LIST_HEAD(&vm->reserved_va_list);
 
@@ -4696,7 +4696,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 	INIT_LIST_HEAD(&va_node->va_buffers_list);
 	INIT_LIST_HEAD(&va_node->reserved_va_list);
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	/* mark that we need to use sparse mappings here */
 	if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) {
@@ -4715,7 +4715,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 					 NULL,
 					 APERTURE_INVALID);
 		if (!map_offset) {
-			mutex_unlock(&vm->update_gmmu_lock);
+			nvgpu_mutex_release(&vm->update_gmmu_lock);
 			nvgpu_free(vma, vaddr_start);
 			kfree(va_node);
 			goto clean_up;
@@ -4725,7 +4725,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 	}
 	list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	args->o_a.offset = vaddr_start;
 	err = 0;
@@ -4754,7 +4754,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
 	vma = vm->vma[pgsz_idx];
 	nvgpu_free(vma, args->offset);
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	va_node = addr_to_reservation(vm, args->offset);
 	if (va_node) {
 		struct mapped_buffer_node *buffer, *n;
@@ -4782,7 +4782,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
 					NULL);
 		kfree(va_node);
 	}
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 	err = 0;
 
 	return err;
@@ -4819,7 +4819,7 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
 	if (likely(priv))
 		return 0;
 
-	mutex_lock(&priv_lock);
+	nvgpu_mutex_acquire(&priv_lock);
 	priv = dma_buf_get_drvdata(dmabuf, dev);
 	if (priv)
 		goto priv_exist_or_err;
@@ -4828,12 +4828,12 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
 		priv = ERR_PTR(-ENOMEM);
 		goto priv_exist_or_err;
 	}
-	mutex_init(&priv->lock);
+	nvgpu_mutex_init(&priv->lock);
 	INIT_LIST_HEAD(&priv->states);
 	priv->buffer_id = ++priv_count;
 	dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
 priv_exist_or_err:
-	mutex_unlock(&priv_lock);
+	nvgpu_mutex_release(&priv_lock);
 	if (IS_ERR(priv))
 		return -ENOMEM;
 
@@ -4858,7 +4858,7 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
 	if (WARN_ON(!priv))
 		return -ENOSYS;
 
-	mutex_lock(&priv->lock);
+	nvgpu_mutex_acquire(&priv->lock);
 
 	list_for_each_entry(s, &priv->states, list)
 		if (s->offset == offset)
@@ -4873,11 +4873,11 @@ int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
 
 	s->offset = offset;
 	INIT_LIST_HEAD(&s->list);
-	mutex_init(&s->lock);
+	nvgpu_mutex_init(&s->lock);
 	list_add_tail(&s->list, &priv->states);
 
 out:
-	mutex_unlock(&priv->lock);
+	nvgpu_mutex_release(&priv->lock);
 	if (!err)
 		*state = s;
 	return err;
@@ -5152,7 +5152,7 @@ int gk20a_mm_fb_flush(struct gk20a *g)
 
 	nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
 
-	mutex_lock(&mm->l2_op_lock);
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
 
 	/* Make sure all previous writes are committed to the L2. There's no
 	   guarantee that writes are to DRAM. This will be a sysmembar internal
@@ -5184,7 +5184,7 @@ int gk20a_mm_fb_flush(struct gk20a *g)
 
 	trace_gk20a_mm_fb_flush_done(dev_name(g->dev));
 
-	mutex_unlock(&mm->l2_op_lock);
+	nvgpu_mutex_release(&mm->l2_op_lock);
 
 	pm_runtime_put_noidle(g->dev);
 
@@ -5231,9 +5231,9 @@ void gk20a_mm_l2_invalidate(struct gk20a *g)
 	struct mm_gk20a *mm = &g->mm;
 	gk20a_busy_noresume(g->dev);
 	if (g->power_on) {
-		mutex_lock(&mm->l2_op_lock);
+		nvgpu_mutex_acquire(&mm->l2_op_lock);
 		gk20a_mm_l2_invalidate_locked(g);
-		mutex_unlock(&mm->l2_op_lock);
+		nvgpu_mutex_release(&mm->l2_op_lock);
 	}
 	pm_runtime_put_noidle(g->dev);
 }
@@ -5252,7 +5252,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
 
 	nvgpu_timeout_init(g, &timeout, 2000, NVGPU_TIMER_RETRY_TIMER);
 
-	mutex_lock(&mm->l2_op_lock);
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
 
 	trace_gk20a_mm_l2_flush(dev_name(g->dev));
 
@@ -5280,7 +5280,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
 	if (invalidate)
 		gk20a_mm_l2_invalidate_locked(g);
 
-	mutex_unlock(&mm->l2_op_lock);
+	nvgpu_mutex_release(&mm->l2_op_lock);
 
 hw_was_off:
 	pm_runtime_put_noidle(g->dev);
@@ -5300,7 +5300,7 @@ void gk20a_mm_cbc_clean(struct gk20a *g)
 
 	nvgpu_timeout_init(g, &timeout, 200, NVGPU_TIMER_RETRY_TIMER);
 
-	mutex_lock(&mm->l2_op_lock);
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
 
 	/* Flush all dirty lines from the CBC to L2 */
 	gk20a_writel(g, flush_l2_clean_comptags_r(),
@@ -5320,7 +5320,7 @@ void gk20a_mm_cbc_clean(struct gk20a *g)
 	} while (!nvgpu_timeout_expired_msg(&timeout,
 					 "l2_clean_comptags too many retries"));
 
-	mutex_unlock(&mm->l2_op_lock);
+	nvgpu_mutex_release(&mm->l2_op_lock);
 
 hw_was_off:
 	pm_runtime_put_noidle(g->dev);
@@ -5334,19 +5334,19 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
 
 	gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
 
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
 							gpu_va);
 	if (!mapped_buffer) {
-		mutex_unlock(&vm->update_gmmu_lock);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		return -EINVAL;
 	}
 
 	*dmabuf = mapped_buffer->dmabuf;
 	*offset = gpu_va - mapped_buffer->addr;
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	return 0;
 }
@@ -5373,7 +5373,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 
 	addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0) >> 12);
 
-	mutex_lock(&tlb_lock);
+	nvgpu_mutex_acquire(&tlb_lock);
 
 	trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));
 
@@ -5414,7 +5414,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 	trace_gk20a_mm_tlb_invalidate_done(dev_name(g->dev));
 
 out:
-	mutex_unlock(&tlb_lock);
+	nvgpu_mutex_release(&tlb_lock);
 }
 
 int gk20a_mm_suspend(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 294dc628..5f29c9e7 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -131,7 +131,7 @@ struct gk20a_buffer_state {
 	struct list_head list;
 
 	/* The valid compbits and the fence must be changed atomically. */
-	struct mutex lock;
+	struct nvgpu_mutex lock;
 
 	/* Offset of the surface within the dma-buf whose state is
 	 * described by this struct (one dma-buf can contain multiple
@@ -272,7 +272,7 @@ struct vm_gk20a {
 
 	struct kref ref;
 
-	struct mutex update_gmmu_lock;
+	struct nvgpu_mutex update_gmmu_lock;
 
 	struct gk20a_mm_entry pdb;
 
@@ -360,7 +360,7 @@ struct mm_gk20a {
 		struct vm_gk20a vm;
 	} ce;
 
-	struct mutex l2_op_lock;
+	struct nvgpu_mutex l2_op_lock;
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 	struct mem_desc bar2_desc;
 #endif
@@ -395,7 +395,7 @@ struct mm_gk20a {
 	struct mem_desc sysmem_flush;
 
 	u32 pramin_window;
-	spinlock_t pramin_window_lock;
+	struct nvgpu_spinlock pramin_window_lock;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
 	u32 force_pramin; /* via debugfs */
 #else
@@ -413,10 +413,10 @@ struct mm_gk20a {
 
 		u32 ce_ctx_id;
 		volatile bool cleared;
-		struct mutex first_clear_mutex;
+		struct nvgpu_mutex first_clear_mutex;
 
 		struct list_head clear_list_head;
-		struct mutex clear_list_mutex;
+		struct nvgpu_mutex clear_list_mutex;
 
 		struct work_struct clear_mem_worker;
 		atomic64_t bytes_pending;
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index fa0909ee..c841c8e6 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -20,6 +20,8 @@
 #include <linux/dma-attrs.h>
 #include <linux/version.h>
 
+#include <nvgpu/lock.h>
+
 #define GK20A_CLKS_MAX		4
 
 struct gk20a;
@@ -184,7 +186,7 @@ struct gk20a_platform {
 
 	/* Called to turn on the device */
 	int (*unrailgate)(struct device *dev);
-	struct mutex railgate_lock;
+	struct nvgpu_mutex railgate_lock;
 
 	/* Called to check state of device */
 	bool (*is_railgated)(struct device *dev);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 5ba9d25b..225b98e4 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -357,7 +357,7 @@ static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
 		emc_rate = tegra_bwmgr_get_max_emc_rate();
 
 	emc_params->freq_last_set = emc_rate;
-	mutex_lock(&platform->railgate_lock);
+	nvgpu_mutex_acquire(&platform->railgate_lock);
 	if (platform->is_railgated && !platform->is_railgated(dev))
 		goto done;
 
@@ -365,7 +365,7 @@ static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
 			TEGRA_BWMGR_SET_EMC_FLOOR);
 
 done:
-	mutex_unlock(&platform->railgate_lock);
+	nvgpu_mutex_release(&platform->railgate_lock);
 }
 
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 6227d523..4ea9b911 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -1414,11 +1414,11 @@ int gk20a_init_pmu(struct pmu_gk20a *pmu)
 	struct gk20a *g = gk20a_from_pmu(pmu);
 	struct pmu_v *pv = &g->ops.pmu_ver;
 
-	mutex_init(&pmu->elpg_mutex);
-	mutex_init(&pmu->pg_mutex);
-	mutex_init(&pmu->isr_mutex);
-	mutex_init(&pmu->pmu_copy_lock);
-	mutex_init(&pmu->pmu_seq_lock);
+	nvgpu_mutex_init(&pmu->elpg_mutex);
+	nvgpu_mutex_init(&pmu->pg_mutex);
+	nvgpu_mutex_init(&pmu->isr_mutex);
+	nvgpu_mutex_init(&pmu->pmu_copy_lock);
+	nvgpu_mutex_init(&pmu->pmu_seq_lock);
 
 	pmu->remove_support = gk20a_remove_pmu_support;
 
@@ -2189,7 +2189,7 @@ void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
 		return;
 	}
 
-	mutex_lock(&pmu->pmu_copy_lock);
+	nvgpu_mutex_acquire(&pmu->pmu_copy_lock);
 
 	words = size >> 2;
 	bytes = size & 0x3;
@@ -2211,7 +2211,7 @@ void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
 			dst[(words << 2) + i] = ((u8 *)&data)[i];
 		}
 	}
-	mutex_unlock(&pmu->pmu_copy_lock);
+	nvgpu_mutex_release(&pmu->pmu_copy_lock);
 	return;
 }
 
@@ -2235,7 +2235,7 @@ void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
 		return;
 	}
 
-	mutex_lock(&pmu->pmu_copy_lock);
+	nvgpu_mutex_acquire(&pmu->pmu_copy_lock);
 
 	words = size >> 2;
 	bytes = size & 0x3;
@@ -2265,7 +2265,7 @@ void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
 			"copy failed. bytes written %d, expected %d",
 			data - dst, size);
 	}
-	mutex_unlock(&pmu->pmu_copy_lock);
+	nvgpu_mutex_release(&pmu->pmu_copy_lock);
 	return;
 }
 
@@ -2571,17 +2571,17 @@ static int pmu_seq_acquire(struct pmu_gk20a *pmu,
 	struct pmu_sequence *seq;
 	u32 index;
 
-	mutex_lock(&pmu->pmu_seq_lock);
+	nvgpu_mutex_acquire(&pmu->pmu_seq_lock);
 	index = find_first_zero_bit(pmu->pmu_seq_tbl,
 				sizeof(pmu->pmu_seq_tbl));
 	if (index >= sizeof(pmu->pmu_seq_tbl)) {
 		gk20a_err(dev_from_gk20a(g),
 			"no free sequence available");
-		mutex_unlock(&pmu->pmu_seq_lock);
+		nvgpu_mutex_release(&pmu->pmu_seq_lock);
 		return -EAGAIN;
 	}
 	set_bit(index, pmu->pmu_seq_tbl);
-	mutex_unlock(&pmu->pmu_seq_lock);
+	nvgpu_mutex_release(&pmu->pmu_seq_lock);
 
 	seq = &pmu->seq[index];
 	seq->state = PMU_SEQ_STATE_PENDING;
@@ -2616,7 +2616,7 @@ static int pmu_queue_init(struct pmu_gk20a *pmu,
 	queue->id	= id;
 	g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
 	queue->mutex_id = id;
-	mutex_init(&queue->mutex);
+	nvgpu_mutex_init(&queue->mutex);
 
 	gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
 		id, queue->index, queue->offset, queue->size);
@@ -2831,7 +2831,7 @@ static int pmu_queue_lock(struct pmu_gk20a *pmu,
 		return 0;
 
 	if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
-		mutex_lock(&queue->mutex);
+		nvgpu_mutex_acquire(&queue->mutex);
 		return 0;
 	}
 
@@ -2848,7 +2848,7 @@ static int pmu_queue_unlock(struct pmu_gk20a *pmu,
 		return 0;
 
 	if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
-		mutex_unlock(&queue->mutex);
+		nvgpu_mutex_release(&queue->mutex);
 		return 0;
 	}
 
@@ -3245,10 +3245,10 @@ static int gk20a_init_pmu_setup_hw1(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	pmu_reset(pmu);
 	pmu->isr_enabled = true;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 
 	/* setup apertures - virtual */
 	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
@@ -4530,9 +4530,9 @@ void gk20a_pmu_isr(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	if (!pmu->isr_enabled) {
-		mutex_unlock(&pmu->isr_mutex);
+		nvgpu_mutex_release(&pmu->isr_mutex);
 		return;
 	}
 
@@ -4546,7 +4546,7 @@ void gk20a_pmu_isr(struct gk20a *g)
 	intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
 	if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
 		gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
-		mutex_unlock(&pmu->isr_mutex);
+		nvgpu_mutex_release(&pmu->isr_mutex);
 		return;
 	}
 
@@ -4583,7 +4583,7 @@ void gk20a_pmu_isr(struct gk20a *g)
 				pwr_falcon_irqsset_swgen0_set_f());
 	}
 
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 }
 
 static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
@@ -4987,7 +4987,7 @@ int gk20a_pmu_enable_elpg(struct gk20a *g)
 	if (!support_gk20a_pmu(g->dev))
 		return ret;
 
-	mutex_lock(&pmu->elpg_mutex);
+	nvgpu_mutex_acquire(&pmu->elpg_mutex);
 
 	pmu->elpg_refcnt++;
 	if (pmu->elpg_refcnt <= 0)
@@ -5026,7 +5026,7 @@ int gk20a_pmu_enable_elpg(struct gk20a *g)
 	}
 
 exit_unlock:
-	mutex_unlock(&pmu->elpg_mutex);
+	nvgpu_mutex_release(&pmu->elpg_mutex);
 	gk20a_dbg_fn("done");
 	return ret;
 }
@@ -5049,7 +5049,7 @@ int gk20a_pmu_disable_elpg(struct gk20a *g)
 	if (!support_gk20a_pmu(g->dev))
 		return ret;
 
-	mutex_lock(&pmu->elpg_mutex);
+	nvgpu_mutex_acquire(&pmu->elpg_mutex);
 
 	pmu->elpg_refcnt--;
 	if (pmu->elpg_refcnt > 0) {
@@ -5138,7 +5138,7 @@ int gk20a_pmu_disable_elpg(struct gk20a *g)
 
 exit_reschedule:
 exit_unlock:
-	mutex_unlock(&pmu->elpg_mutex);
+	nvgpu_mutex_release(&pmu->elpg_mutex);
 	gk20a_dbg_fn("done");
 	return ret;
 }
@@ -5182,9 +5182,9 @@ int gk20a_pmu_destroy(struct gk20a *g)
 	g->pg_ungating_time_us += (u64)pg_stat_data.ungating_time;
 	g->pg_gating_cnt += pg_stat_data.gating_cnt;
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	pmu->isr_enabled = false;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 
 	pmu->pmu_state = PMU_STATE_OFF;
 	pmu->pmu_ready = false;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index e4513457..c1583eab 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -136,7 +136,7 @@ struct pmu_queue {
 	u32 mutex_id;
 	u32 mutex_lock;
 	/* used by sw, for LPQ/HPQ queue */
-	struct mutex mutex;
+	struct nvgpu_mutex mutex;
 
 	/* current write position */
 	u32 position;
@@ -334,8 +334,8 @@ struct pmu_gk20a {
 	struct pmu_mutex *mutex;
 	u32 mutex_cnt;
 
-	struct mutex pmu_copy_lock;
-	struct mutex pmu_seq_lock;
+	struct nvgpu_mutex pmu_copy_lock;
+	struct nvgpu_mutex pmu_seq_lock;
 
 	struct nvgpu_allocator dmem;
 
@@ -355,8 +355,8 @@ struct pmu_gk20a {
 
 #define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC	1 /* msec */
 	struct work_struct pg_init;
-	struct mutex pg_mutex; /* protect pg-RPPG/MSCG enable/disable */
-	struct mutex elpg_mutex; /* protect elpg enable/disable */
+	struct nvgpu_mutex pg_mutex; /* protect pg-RPPG/MSCG enable/disable */
+	struct nvgpu_mutex elpg_mutex; /* protect elpg enable/disable */
 	int elpg_refcnt; /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
 
 	union {
@@ -375,7 +375,7 @@ struct pmu_gk20a {
 	u32 load_shadow;
 	u32 load_avg;
 
-	struct mutex isr_mutex;
+	struct nvgpu_mutex isr_mutex;
 	bool isr_enabled;
 
 	bool zbc_ready;
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
index 20cd1232..6fdc2774 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -46,29 +46,29 @@ ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
 		return -EINVAL;
 	size = sizeof(event);
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	while (!sched->status) {
-		mutex_unlock(&sched->status_lock);
+		nvgpu_mutex_release(&sched->status_lock);
 		if (filp->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 		err = wait_event_interruptible(sched->readout_wq,
 			sched->status);
 		if (err)
 			return err;
-		mutex_lock(&sched->status_lock);
+		nvgpu_mutex_acquire(&sched->status_lock);
 	}
 
 	event.reserved = 0;
 	event.status = sched->status;
 
 	if (copy_to_user(buf, &event, size)) {
-		mutex_unlock(&sched->status_lock);
+		nvgpu_mutex_release(&sched->status_lock);
 		return -EFAULT;
 	}
 
 	sched->status = 0;
 
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	return size;
 }
@@ -80,11 +80,11 @@ unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	poll_wait(filp, &sched->readout_wq, wait);
 	if (sched->status)
 		mask |= POLLIN | POLLRDNORM;
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	return mask;
 }
@@ -100,13 +100,13 @@ static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
 		return -ENOSPC;
 	}
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
 		sched->active_tsg_bitmap, sched->bitmap_size)) {
-		mutex_unlock(&sched->status_lock);
+		nvgpu_mutex_release(&sched->status_lock);
 		return -EFAULT;
 	}
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	return 0;
 }
@@ -122,15 +122,15 @@ static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
 		return -ENOSPC;
 	}
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
 		sched->recent_tsg_bitmap, sched->bitmap_size)) {
-		mutex_unlock(&sched->status_lock);
+		nvgpu_mutex_release(&sched->status_lock);
 		return -EFAULT;
 	}
 
 	memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	return 0;
 }
@@ -158,7 +158,7 @@ static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
 	if (!bitmap)
 		return -ENOMEM;
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
 		if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
 			tsg = &f->tsg[tsgid];
@@ -166,7 +166,7 @@ static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
 				NVGPU_SCHED_SET(tsgid, bitmap);
 		}
 	}
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
 		bitmap, sched->bitmap_size))
@@ -283,9 +283,9 @@ static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
 {
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
 
-	mutex_lock(&sched->control_lock);
+	nvgpu_mutex_acquire(&sched->control_lock);
 	sched->control_locked = true;
-	mutex_unlock(&sched->control_lock);
+	nvgpu_mutex_release(&sched->control_lock);
 	return 0;
 }
 
@@ -293,9 +293,9 @@ static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
 {
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "");
 
-	mutex_lock(&sched->control_lock);
+	nvgpu_mutex_acquire(&sched->control_lock);
 	sched->control_locked = false;
-	mutex_unlock(&sched->control_lock);
+	nvgpu_mutex_release(&sched->control_lock);
 	return 0;
 }
 
@@ -325,12 +325,12 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
 	if (!kref_get_unless_zero(&tsg->refcount))
 		return -ENXIO;
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
 		gk20a_warn(dev_from_gk20a(g),
 			"tsgid=%d already referenced", tsgid);
 		/* unlock status_lock as gk20a_tsg_release locks it */
-		mutex_unlock(&sched->status_lock);
+		nvgpu_mutex_release(&sched->status_lock);
 		kref_put(&tsg->refcount, gk20a_tsg_release);
 		return -ENXIO;
 	}
@@ -339,7 +339,7 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
 	 * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
 	 */
 	NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	return 0;
 }
@@ -357,15 +357,15 @@ static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
 	if (tsgid >= f->num_channels)
 		return -EINVAL;
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
-		mutex_unlock(&sched->status_lock);
+		nvgpu_mutex_release(&sched->status_lock);
 		gk20a_warn(dev_from_gk20a(g),
 			"tsgid=%d not previously referenced", tsgid);
 		return -ENXIO;
 	}
 	NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	tsg = &f->tsg[tsgid];
 	kref_put(&tsg->refcount, gk20a_tsg_release);
@@ -390,7 +390,7 @@ int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
 		gk20a_idle(g->dev);
 	}
 
-	if (!mutex_trylock(&sched->busy_lock))
+	if (!nvgpu_mutex_tryacquire(&sched->busy_lock))
 		return -EBUSY;
 
 	memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
@@ -506,11 +506,11 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
 	}
 
 	/* unlock control */
-	mutex_lock(&sched->control_lock);
+	nvgpu_mutex_acquire(&sched->control_lock);
 	sched->control_locked = false;
-	mutex_unlock(&sched->control_lock);
+	nvgpu_mutex_release(&sched->control_lock);
 
-	mutex_unlock(&sched->busy_lock);
+	nvgpu_mutex_release(&sched->busy_lock);
 	return 0;
 }
 
@@ -530,16 +530,16 @@ static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
 	if (err)
 		return err;
 
-	if (mutex_trylock(&sched->busy_lock)) {
+	if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
 		sched_busy = false;
-		mutex_unlock(&sched->busy_lock);
+		nvgpu_mutex_release(&sched->busy_lock);
 	}
 
 	seq_printf(s, "control_locked=%d\n", sched->control_locked);
 	seq_printf(s, "busy=%d\n", sched_busy);
 	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 
 	seq_puts(s, "active_tsg_bitmap\n");
 	for (i = 0; i < n; i++)
@@ -549,7 +549,7 @@ static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
 	for (i = 0; i < n; i++)
 		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
 
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 
 	gk20a_idle(g->dev);
 
@@ -594,11 +594,11 @@ void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
 		gk20a_idle(g->dev);
 	}
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
 	NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
 	sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 	wake_up_interruptible(&sched->readout_wq);
 }
 
@@ -608,7 +608,7 @@ void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
 
-	mutex_lock(&sched->status_lock);
+	nvgpu_mutex_acquire(&sched->status_lock);
 	NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
 
 	/* clear recent_tsg_bitmap as well: if app manager did not
@@ -621,7 +621,7 @@ void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
 	/* do not set event_pending, we only want to notify app manager
 	 * when TSGs are added, so that it can apply sched params
 	 */
-	mutex_unlock(&sched->status_lock);
+	nvgpu_mutex_release(&sched->status_lock);
 }
 
 int gk20a_sched_ctrl_init(struct gk20a *g)
@@ -652,9 +652,9 @@ int gk20a_sched_ctrl_init(struct gk20a *g)
 		goto free_recent;
 
 	init_waitqueue_head(&sched->readout_wq);
-	mutex_init(&sched->status_lock);
-	mutex_init(&sched->control_lock);
-	mutex_init(&sched->busy_lock);
+	nvgpu_mutex_init(&sched->status_lock);
+	nvgpu_mutex_init(&sched->control_lock);
+	nvgpu_mutex_init(&sched->busy_lock);
 
 	sched->sw_ready = true;
 
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
index 0ae13783..1f983678 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -21,11 +21,11 @@ struct tsg_gk20a;
 struct gk20a_sched_ctrl {
 	struct gk20a *g;
 
-	struct mutex control_lock;
+	struct nvgpu_mutex control_lock;
 	bool control_locked;
 	bool sw_ready;
-	struct mutex status_lock;
-	struct mutex busy_lock;
+	struct nvgpu_mutex status_lock;
+	struct nvgpu_mutex busy_lock;
 
 	u64 status;
 
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index edfe3deb..f57871d5 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -20,8 +20,7 @@
 #include <linux/hrtimer.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
-
+#include <nvgpu/lock.h>
 #include <uapi/linux/nvgpu.h>
 
 #include <nvgpu/semaphore.h>
@@ -55,7 +54,7 @@ struct gk20a_sync_pt {
 	 * than a mutex - there should be very little contention on this
 	 * lock.
 	 */
-	spinlock_t			lock;
+	struct nvgpu_spinlock			lock;
 };
 
 struct gk20a_sync_pt_inst {
@@ -242,7 +241,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 		}
 	}
 
-	spin_lock_init(&shared->lock);
+	nvgpu_spinlock_init(&shared->lock);
 
 	nvgpu_semaphore_get(sema);
 
@@ -304,7 +303,7 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
 #endif
 	bool signaled = true;
 
-	spin_lock(&pt->lock);
+	nvgpu_spinlock_acquire(&pt->lock);
 	if (!pt->sema)
 		goto done;
 
@@ -345,7 +344,7 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
 		pt->sema = NULL;
 	}
 done:
-	spin_unlock(&pt->lock);
+	nvgpu_spinlock_release(&pt->lock);
 
 	return signaled;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 96d6873d..aadf5463 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -169,7 +169,7 @@ int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
 	init_rwsem(&tsg->ch_list_lock);
 
 	INIT_LIST_HEAD(&tsg->event_id_list);
-	mutex_init(&tsg->event_id_list_lock);
+	nvgpu_mutex_init(&tsg->event_id_list_lock);
 
 	return 0;
 }
@@ -204,7 +204,7 @@ static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
 	struct gk20a_event_id_data *local_event_id_data;
 	bool event_found = false;
 
-	mutex_lock(&tsg->event_id_list_lock);
+	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
 	list_for_each_entry(local_event_id_data, &tsg->event_id_list,
 						 event_id_node) {
 		if (local_event_id_data->event_id == event_id) {
@@ -212,7 +212,7 @@ static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
 			break;
 		}
 	}
-	mutex_unlock(&tsg->event_id_list_lock);
+	nvgpu_mutex_release(&tsg->event_id_list_lock);
 
 	if (event_found) {
 		*event_id_data = local_event_id_data;
@@ -233,7 +233,7 @@ void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
 	if (err)
 		return;
 
-	mutex_lock(&event_id_data->lock);
+	nvgpu_mutex_acquire(&event_id_data->lock);
 
 	gk20a_dbg_info(
 		"posting event for event_id=%d on tsg=%d\n",
@@ -242,7 +242,7 @@ void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
 
 	wake_up_interruptible_all(&event_id_data->event_id_wq);
 
-	mutex_unlock(&event_id_data->lock);
+	nvgpu_mutex_release(&event_id_data->lock);
 }
 
 static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
@@ -287,12 +287,12 @@ static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
 	event_id_data->event_id = event_id;
 
 	init_waitqueue_head(&event_id_data->event_id_wq);
-	mutex_init(&event_id_data->lock);
+	nvgpu_mutex_init(&event_id_data->lock);
 	INIT_LIST_HEAD(&event_id_data->event_id_node);
 
-	mutex_lock(&tsg->event_id_list_lock);
+	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
 	list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
-	mutex_unlock(&tsg->event_id_list_lock);
+	nvgpu_mutex_release(&tsg->event_id_list_lock);
 
 	fd_install(local_fd, file);
 	file->private_data = event_id_data;
@@ -370,9 +370,9 @@ int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
 
 static void release_used_tsg(struct fifo_gk20a *f, struct tsg_gk20a *tsg)
 {
-	mutex_lock(&f->tsg_inuse_mutex);
+	nvgpu_mutex_acquire(&f->tsg_inuse_mutex);
 	f->tsg[tsg->tsgid].in_use = false;
-	mutex_unlock(&f->tsg_inuse_mutex);
+	nvgpu_mutex_release(&f->tsg_inuse_mutex);
 }
 
 static struct tsg_gk20a *acquire_unused_tsg(struct fifo_gk20a *f)
@@ -380,7 +380,7 @@ static struct tsg_gk20a *acquire_unused_tsg(struct fifo_gk20a *f)
 	struct tsg_gk20a *tsg = NULL;
 	unsigned int tsgid;
 
-	mutex_lock(&f->tsg_inuse_mutex);
+	nvgpu_mutex_acquire(&f->tsg_inuse_mutex);
 	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
 		if (!f->tsg[tsgid].in_use) {
 			f->tsg[tsgid].in_use = true;
@@ -388,7 +388,7 @@ static struct tsg_gk20a *acquire_unused_tsg(struct fifo_gk20a *f)
 			break;
 		}
 	}
-	mutex_unlock(&f->tsg_inuse_mutex);
+	nvgpu_mutex_release(&f->tsg_inuse_mutex);
 
 	return tsg;
 }
@@ -482,13 +482,13 @@ void gk20a_tsg_release(struct kref *ref)
 	gk20a_sched_ctrl_tsg_removed(g, tsg);
 
 	/* unhook all events created on this TSG */
-	mutex_lock(&tsg->event_id_list_lock);
+	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
 	list_for_each_entry_safe(event_id_data, event_id_data_temp,
 				&tsg->event_id_list,
 				event_id_node) {
 		list_del_init(&event_id_data->event_id_node);
 	}
-	mutex_unlock(&tsg->event_id_list_lock);
+	nvgpu_mutex_release(&tsg->event_id_list_lock);
 
 	release_used_tsg(&g->fifo, tsg);
 
@@ -517,7 +517,7 @@ static int gk20a_tsg_ioctl_set_priority(struct gk20a *g,
 	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
 	int err;
 
-	mutex_lock(&sched->control_lock);
+	nvgpu_mutex_acquire(&sched->control_lock);
 	if (sched->control_locked) {
 		err = -EPERM;
 		goto done;
@@ -533,7 +533,7 @@ static int gk20a_tsg_ioctl_set_priority(struct gk20a *g,
 
 	gk20a_idle(g->dev);
 done:
-	mutex_unlock(&sched->control_lock);
+	nvgpu_mutex_release(&sched->control_lock);
 	return err;
 }
 
@@ -545,7 +545,7 @@ static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
 
-	mutex_lock(&sched->control_lock);
+	nvgpu_mutex_acquire(&sched->control_lock);
 	if (sched->control_locked) {
 		err = -EPERM;
 		goto done;
@@ -560,7 +560,7 @@ static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
 
 	gk20a_idle(g->dev);
 done:
-	mutex_unlock(&sched->control_lock);
+	nvgpu_mutex_release(&sched->control_lock);
 	return err;
 }
 
@@ -572,7 +572,7 @@ static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
 
-	mutex_lock(&sched->control_lock);
+	nvgpu_mutex_acquire(&sched->control_lock);
 	if (sched->control_locked) {
 		err = -EPERM;
 		goto done;
@@ -585,7 +585,7 @@ static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
 	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
 	gk20a_idle(g->dev);
 done:
-	mutex_unlock(&sched->control_lock);
+	nvgpu_mutex_release(&sched->control_lock);
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index a6642682..f95ae008 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -16,6 +16,8 @@
 #ifndef __TSG_GK20A_H_
 #define __TSG_GK20A_H_
 
+#include <nvgpu/lock.h>
+
 #define NVGPU_INVALID_TSG_ID (-1)
 
 struct channel_gk20a;
@@ -58,7 +60,7 @@ struct tsg_gk20a {
 	u32 interleave_level;
 
 	struct list_head event_id_list;
-	struct mutex event_id_list_lock;
+	struct nvgpu_mutex event_id_list_lock;
 
 	u32 runlist_id;
 	pid_t tgid;
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 40a28136..c1cefc29 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -77,10 +77,10 @@ static get_ucode_details pmu_acr_supp_ucode_list[] = {
 static void start_gm20b_pmu(struct gk20a *g)
 {
 	/*disable irqs for hs falcon booting as we will poll for halt*/
-	mutex_lock(&g->pmu.isr_mutex);
+	nvgpu_mutex_acquire(&g->pmu.isr_mutex);
 	pmu_enable_irq(&g->pmu, true);
 	g->pmu.isr_enabled = true;
-	mutex_unlock(&g->pmu.isr_mutex);
+	nvgpu_mutex_release(&g->pmu.isr_mutex);
 	gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
 		pwr_falcon_cpuctl_startcpu_f(1));
 }
@@ -1282,10 +1282,10 @@ int gm20b_init_nspmu_setup_hw1(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	pmu_reset(pmu);
 	pmu->isr_enabled = true;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 
 	/* setup apertures - virtual */
 	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
@@ -1318,10 +1318,10 @@ static int gm20b_init_pmu_setup_hw1(struct gk20a *g,
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	g->ops.pmu.reset(g);
 	pmu->isr_enabled = true;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 
 	/* setup apertures - virtual */
 	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
@@ -1353,10 +1353,10 @@ static int gm20b_init_pmu_setup_hw1(struct gk20a *g,
 			(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
 			g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
 	/*disable irqs for hs falcon booting as we will poll for halt*/
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	pmu_enable_irq(pmu, false);
 	pmu->isr_enabled = false;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 	/*Clearing mailbox register used to reflect capabilities*/
 	gk20a_writel(g, pwr_falcon_mailbox1_r(), 0);
 	err = bl_bootstrap(pmu, desc, bl_sz);
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
index 8db4944e..fc352151 100644
--- a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
@@ -1191,7 +1191,7 @@ static int gm20b_init_clk_setup_sw(struct gk20a *g)
 	}
 #endif
 
-	mutex_init(&clk->clk_mutex);
+	nvgpu_mutex_init(&clk->clk_mutex);
 
 	clk->sw_ready = true;
 
@@ -1212,10 +1212,10 @@ static int gm20b_clk_prepare(struct clk_hw *hw)
 	struct clk_gk20a *clk = to_clk_gk20a(hw);
 	int ret = 0;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (!clk->gpc_pll.enabled && clk->clk_hw_on)
 		ret = set_pll_freq(clk->g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	return ret;
 }
 
@@ -1223,10 +1223,10 @@ static void gm20b_clk_unprepare(struct clk_hw *hw)
 {
 	struct clk_gk20a *clk = to_clk_gk20a(hw);
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (clk->gpc_pll.enabled && clk->clk_hw_on)
 		clk_disable_gpcpll(clk->g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 }
 
 static int gm20b_clk_is_prepared(struct clk_hw *hw)
@@ -1250,12 +1250,12 @@ static int gm20b_gpcclk_set_rate(struct clk_hw *hw, unsigned long rate,
 	u32 old_freq;
 	int ret = -ENODATA;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	old_freq = clk->gpc_pll.freq;
 	ret = set_pll_target(clk->g, rate_gpu_to_gpc2clk(rate), old_freq);
 	if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
 		ret = set_pll_freq(clk->g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 
 	return ret;
 }
@@ -1272,7 +1272,7 @@ static long gm20b_round_rate(struct clk_hw *hw, unsigned long rate,
 	if (rate > maxrate)
 		rate = maxrate;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	freq = rate_gpu_to_gpc2clk(rate);
 	if (freq > gpc_pll_params.max_freq)
 		freq = gpc_pll_params.max_freq;
@@ -1281,7 +1281,7 @@ static long gm20b_round_rate(struct clk_hw *hw, unsigned long rate,
 
 	tmp_pll = clk->gpc_pll;
 	clk_config_pll(clk, &tmp_pll, &gpc_pll_params, &freq, true);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 
 	return rate_gpc2clk_to_gpu(tmp_pll.freq);
 }
@@ -1445,14 +1445,14 @@ static int gm20b_clk_export_set_rate(void *data, unsigned long *rate)
 	struct clk_gk20a *clk = &g->clk;
 
 	if (rate) {
-		mutex_lock(&clk->clk_mutex);
+		nvgpu_mutex_acquire(&clk->clk_mutex);
 		old_freq = clk->gpc_pll.freq;
 		ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
 		if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
 			ret = set_pll_freq(g, 1);
 		if (!ret)
 			*rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-		mutex_unlock(&clk->clk_mutex);
+		nvgpu_mutex_release(&clk->clk_mutex);
 	}
 	return ret;
 }
@@ -1463,10 +1463,10 @@ static int gm20b_clk_export_enable(void *data)
 	struct gk20a *g = data;
 	struct clk_gk20a *clk = &g->clk;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (!clk->gpc_pll.enabled && clk->clk_hw_on)
 		ret = set_pll_freq(g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	return ret;
 }
 
@@ -1475,10 +1475,10 @@ static void gm20b_clk_export_disable(void *data)
 	struct gk20a *g = data;
 	struct clk_gk20a *clk = &g->clk;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (clk->gpc_pll.enabled && clk->clk_hw_on)
 		clk_disable_gpcpll(g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 }
 
 static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
@@ -1486,12 +1486,12 @@ static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
 	struct gk20a *g = data;
 	struct clk_gk20a *clk = &g->clk;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (state)
 		*state = clk->gpc_pll.enabled;
 	if (rate)
 		*rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 }
 
 static struct tegra_clk_export_ops gm20b_clk_export_ops = {
@@ -1539,11 +1539,11 @@ static int gm20b_init_clk_support(struct gk20a *g)
 		return err;
 #endif
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	clk->clk_hw_on = true;
 
 	err = gm20b_init_clk_setup_hw(g);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	if (err)
 		return err;
 
@@ -1559,10 +1559,10 @@ static int gm20b_init_clk_support(struct gk20a *g)
 		return err;
 
 	/* The prev call may not enable PLL if gbus is unbalanced - force it */
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 	if (!clk->gpc_pll.enabled)
 		err = set_pll_freq(g, 1);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 	if (err)
 		return err;
 
@@ -1582,11 +1582,11 @@ static int gm20b_suspend_clk_support(struct gk20a *g)
 	clk_disable_unprepare(g->clk.tegra_clk);
 
 	/* The prev call may not disable PLL if gbus is unbalanced - force it */
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 	if (g->clk.gpc_pll.enabled)
 		ret = clk_disable_gpcpll(g, 1);
 	g->clk.clk_hw_on = false;
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 	return ret;
 }
 
@@ -1616,11 +1616,11 @@ static int pll_reg_show(struct seq_file *s, void *data)
 	struct gk20a *g = s->private;
 	u32 reg, m, n, pl, f;
 
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 	if (!g->clk.clk_hw_on) {
 		seq_printf(s, "%s powered down - no access to registers\n",
 			   dev_name(dev_from_gk20a(g)));
-		mutex_unlock(&g->clk.clk_mutex);
+		nvgpu_mutex_release(&g->clk.clk_mutex);
 		return 0;
 	}
 
@@ -1642,7 +1642,7 @@ static int pll_reg_show(struct seq_file *s, void *data)
 	f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div(pl));
 	seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
 	seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 	return 0;
 }
 
@@ -1663,11 +1663,11 @@ static int pll_reg_raw_show(struct seq_file *s, void *data)
 	struct gk20a *g = s->private;
 	u32 reg;
 
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 	if (!g->clk.clk_hw_on) {
 		seq_printf(s, "%s powered down - no access to registers\n",
 			   dev_name(dev_from_gk20a(g)));
-		mutex_unlock(&g->clk.clk_mutex);
+		nvgpu_mutex_release(&g->clk.clk_mutex);
 		return 0;
 	}
 
@@ -1685,7 +1685,7 @@ static int pll_reg_raw_show(struct seq_file *s, void *data)
 	reg = trim_sys_bypassctrl_r();
 	seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
 
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 	return 0;
 }
 
@@ -1722,13 +1722,13 @@ static ssize_t pll_reg_raw_write(struct file *file,
 	    (reg != trim_sys_bypassctrl_r()))
 		return -EPERM;
 
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 	if (!g->clk.clk_hw_on) {
-		mutex_unlock(&g->clk.clk_mutex);
+		nvgpu_mutex_release(&g->clk.clk_mutex);
 		return -EBUSY;
 	}
 	gk20a_writel(g, reg, val);
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 	return count;
 }
 
@@ -1755,7 +1755,7 @@ static int monitor_get(void *data, u64 *val)
 	if (err)
 		return err;
 
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 
 	/* Disable clock slowdown during measurements */
 	clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
@@ -1787,7 +1787,7 @@ static int monitor_get(void *data, u64 *val)
 
 	/* Restore clock slowdown */
 	gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 
 	gk20a_idle(g->dev);
 
@@ -1811,14 +1811,14 @@ static int voltage_get(void *data, u64 *val)
 	if (err)
 		return err;
 
-	mutex_lock(&g->clk.clk_mutex);
+	nvgpu_mutex_acquire(&g->clk.clk_mutex);
 
 	det_out = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
 	det_out = trim_sys_gpcpll_cfg3_dfs_testout_v(det_out);
 	*val = div64_u64((u64)det_out * gpc_pll_params.uvdet_slope +
 		gpc_pll_params.uvdet_offs, 1000ULL);
 
-	mutex_unlock(&g->clk.clk_mutex);
+	nvgpu_mutex_release(&g->clk.clk_mutex);
 
 	gk20a_idle(g->dev);
 	return 0;
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
index 7ea84826..5746165e 100644
--- a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
@@ -1,7 +1,7 @@
 /*
  * GM20B Graphics
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -19,7 +19,7 @@
 #ifndef _NVHOST_CLK_GM20B_H_
 #define _NVHOST_CLK_GM20B_H_
 
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 
 void gm20b_init_clk_ops(struct gpu_ops *gops);
 
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 0375d71f..9cf644fd 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1247,7 +1247,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
 					       GPU_LIT_TPC_IN_GPC_STRIDE);
 	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
 			gr_gpc0_tpc0_sm_cfg_r() + offset));
@@ -1263,7 +1263,7 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
 	gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
 			gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return 0;
 }
@@ -1280,7 +1280,7 @@ static int gm20b_gr_update_sm_error_state(struct gk20a *g,
 					       GPU_LIT_TPC_IN_GPC_STRIDE);
 	int err = 0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	gr->sm_error_states[sm_id].hww_global_esr =
 			sm_error_state->hww_global_esr;
@@ -1336,7 +1336,7 @@ enable_ctxsw:
 	err = gr_gk20a_enable_ctxsw(g);
 
 fail:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
@@ -1351,7 +1351,7 @@ static int gm20b_gr_clear_sm_error_state(struct gk20a *g,
 					       GPU_LIT_TPC_IN_GPC_STRIDE);
 	int err = 0;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
 
@@ -1377,7 +1377,7 @@ static int gm20b_gr_clear_sm_error_state(struct gk20a *g,
 	err = gr_gk20a_enable_ctxsw(g);
 
 fail:
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 3324d3df..11258032 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -120,7 +120,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 	if (gr->compbit_store.mem.size == 0)
 		return 0;
 
-	mutex_lock(&g->mm.l2_op_lock);
+	nvgpu_mutex_acquire(&g->mm.l2_op_lock);
 
 	if (op == gk20a_cbc_op_clear) {
 		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
@@ -163,7 +163,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 	}
 out:
 	trace_gk20a_ltc_cbc_ctrl_done(dev_name(g->dev));
-	mutex_unlock(&g->mm.l2_op_lock);
+	nvgpu_mutex_release(&g->mm.l2_op_lock);
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index edeb97ec..3b9c444d 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -1,7 +1,7 @@
 /*
  * GP106 Clocks
  *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -79,7 +79,7 @@ static int gp106_init_clk_support(struct gk20a *g) {
 
 	gk20a_dbg_fn("");
 
-	mutex_init(&clk->clk_mutex);
+	nvgpu_mutex_init(&clk->clk_mutex);
 
 	clk->clk_namemap = (struct namemap_cfg *)
 		kzalloc(sizeof(struct namemap_cfg) * NUM_NAMEMAPS, GFP_KERNEL);
@@ -169,7 +169,7 @@ static u32 gp106_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) {
 	if (!c || !c->cntr.reg_ctrl_addr || !c->cntr.reg_cntr_addr)
 		return 0;
 
-	mutex_lock(&clk->clk_mutex);
+	nvgpu_mutex_acquire(&clk->clk_mutex);
 
 	/* Save the register */
 	save_reg = gk20a_readl(g, c->cntr.reg_ctrl_addr);
@@ -216,7 +216,7 @@ read_err:
 	gk20a_readl(g, c->cntr.reg_ctrl_addr);
 	gk20a_writel(g, c->cntr.reg_ctrl_addr, save_reg);
 	gk20a_readl(g, c->cntr.reg_ctrl_addr);
-	mutex_unlock(&clk->clk_mutex);
+	nvgpu_mutex_release(&clk->clk_mutex);
 
 	return cntr;
 
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.h b/drivers/gpu/nvgpu/gp106/clk_gp106.h
index 7df4b974..3c2e31d1 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -16,7 +16,7 @@
 #ifndef CLK_GP106_H
 #define CLK_GP106_H
 
-#include <linux/mutex.h>
+#include <nvgpu/lock.h>
 
 #define CLK_NAMEMAP_INDEX_GPC2CLK	0x00
 #define CLK_NAMEMAP_INDEX_XBAR2CLK	0x02
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 9af16886..51e76605 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -114,7 +114,7 @@ void sec2_copy_to_dmem(struct pmu_gk20a *pmu,
 		return;
 	}
 
-	mutex_lock(&pmu->pmu_copy_lock);
+	nvgpu_mutex_acquire(&pmu->pmu_copy_lock);
 
 	words = size >> 2;
 	bytes = size & 0x3;
@@ -144,7 +144,7 @@ void sec2_copy_to_dmem(struct pmu_gk20a *pmu,
 			"copy failed. bytes written %d, expected %d",
 			data - dst, size);
 	}
-	mutex_unlock(&pmu->pmu_copy_lock);
+	nvgpu_mutex_release(&pmu->pmu_copy_lock);
 	return;
 }
 
@@ -348,10 +348,10 @@ int init_sec2_setup_hw1(struct gk20a *g,
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	g->ops.pmu.reset(g);
 	pmu->isr_enabled = true;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 
 	data = gk20a_readl(g, psec_fbif_ctl_r());
 	data |= psec_fbif_ctl_allow_phys_no_ctx_allow_f();
@@ -379,11 +379,11 @@ int init_sec2_setup_hw1(struct gk20a *g,
 			psec_fbif_transcfg_target_noncoherent_sysmem_f());
 
 	/*disable irqs for hs falcon booting as we will poll for halt*/
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	pmu_enable_irq(pmu, false);
 	sec_enable_irq(pmu, false);
 	pmu->isr_enabled = false;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 	err = bl_bootstrap_sec2(pmu, desc, bl_sz);
 	if (err)
 		return err;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index e680e753..da121b56 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1997,16 +1997,16 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
 	int local_ctx_resident_ch_fd = -1;
 	bool ctx_resident;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	err = gr_gk20a_disable_ctxsw(g);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
-		mutex_unlock(&g->dbg_sessions_lock);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
 		goto clean_up;
 	}
 
-	mutex_lock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
 
 	list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
 		ch = g->fifo.channel + ch_data->chid;
@@ -2019,15 +2019,15 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
 			cilp_preempt_pending_ch = ch;
 	}
 
-	mutex_unlock(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
 
 	err = gr_gk20a_enable_ctxsw(g);
 	if (err) {
-		mutex_unlock(&g->dbg_sessions_lock);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
 		goto clean_up;
 	}
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	if (cilp_preempt_pending_ch) {
 		struct channel_ctx_gk20a *ch_ctx =
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index b989e6a4..9c565729 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -306,10 +306,10 @@ static int gp10b_init_pmu_setup_hw1(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&pmu->isr_mutex);
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
 	pmu_reset(pmu);
 	pmu->isr_enabled = true;
-	mutex_unlock(&pmu->isr_mutex);
+	nvgpu_mutex_release(&pmu->isr_mutex);
 
 	/* setup apertures - virtual */
 	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index d5a90c87..16fe2641 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/platform_device.h>
+#include <nvgpu/lock.h>
 
 /* #define ALLOCATOR_DEBUG */
 
@@ -78,7 +79,7 @@ struct nvgpu_allocator_ops {
 
 struct nvgpu_allocator {
 	char name[32];
-	struct mutex lock;
+	struct nvgpu_mutex lock;
 
 	void *priv;
 	const struct nvgpu_allocator_ops *ops;
@@ -167,12 +168,12 @@ struct nvgpu_alloc_carveout {
 
 static inline void alloc_lock(struct nvgpu_allocator *a)
 {
-	mutex_lock(&a->lock);
+	nvgpu_mutex_acquire(&a->lock);
 }
 
 static inline void alloc_unlock(struct nvgpu_allocator *a)
 {
-	mutex_unlock(&a->lock);
+	nvgpu_mutex_release(&a->lock);
 }
 
 /*
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index cc4921d3..6f479383 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -84,7 +84,7 @@ struct nvgpu_semaphore_pool {
 
 	struct nvgpu_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
 
-	struct mutex pool_lock;
+	struct nvgpu_mutex pool_lock;
 
 	/*
 	 * This is the address spaces's personal RW table. Other channels will
@@ -143,7 +143,7 @@ struct nvgpu_semaphore_sea {
 	 */
 	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
 
-	struct mutex sea_lock;		/* Lock alloc/free calls. */
+	struct nvgpu_mutex sea_lock;		/* Lock alloc/free calls. */
 };
 
 /*
diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.c b/drivers/gpu/nvgpu/lpwr/lpwr.c
index 9636891b..b722a900 100644
--- a/drivers/gpu/nvgpu/lpwr/lpwr.c
+++ b/drivers/gpu/nvgpu/lpwr/lpwr.c
@@ -346,7 +346,7 @@ int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock)
 
 	if (pstate_lock)
 		nvgpu_clk_arb_pstate_change_lock(g, true);
-	mutex_lock(&pmu->pg_mutex);
+	nvgpu_mutex_acquire(&pmu->pg_mutex);
 
 	present_pstate = nvgpu_clk_arb_get_current_pstate(g);
 
@@ -367,7 +367,7 @@ int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock)
 			status = gk20a_pmu_enable_elpg(g);
 	}
 
-	mutex_unlock(&pmu->pg_mutex);
+	nvgpu_mutex_release(&pmu->pg_mutex);
 	if (pstate_lock)
 		nvgpu_clk_arb_pstate_change_lock(g, false);
 
@@ -386,7 +386,7 @@ int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock)
 
 	if (pstate_lock)
 		nvgpu_clk_arb_pstate_change_lock(g, true);
-	mutex_lock(&pmu->pg_mutex);
+	nvgpu_mutex_acquire(&pmu->pg_mutex);
 
 	present_pstate = nvgpu_clk_arb_get_current_pstate(g);
 
@@ -411,7 +411,7 @@ int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock)
 	}
 
 exit_unlock:
-	mutex_unlock(&pmu->pg_mutex);
+	nvgpu_mutex_release(&pmu->pg_mutex);
 	if (pstate_lock)
 		nvgpu_clk_arb_pstate_change_lock(g, false);
 
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index feb8cca8..dcb8464c 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -1,7 +1,7 @@
 /*
  * general p state infrastructure
  *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -333,7 +333,7 @@ static int pstate_sw_setup(struct gk20a *g)
 	gk20a_dbg_fn("");
 
 	init_waitqueue_head(&g->perf_pmu.pstatesobjs.pstate_notifier_wq);
-	mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex);
+	nvgpu_mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex);
 
 	err = boardobjgrpconstruct_e32(&g->perf_pmu.pstatesobjs.super);
 	if (err) {
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index af0956e8..22ba98b9 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -1,7 +1,7 @@
 /*
  * general p state infrastructure
  *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -50,7 +50,7 @@ struct pstates {
 	u32  num_levels;
 	wait_queue_head_t pstate_notifier_wq;
 	u32 is_pstate_switch_on;
-	struct mutex pstate_mutex; /* protect is_pstate_switch_on */
+	struct nvgpu_mutex pstate_mutex; /* protect is_pstate_switch_on */
 };
 
 int gk20a_init_pstate_support(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 339f2237..7b6ed322 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Fifo
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -218,7 +218,7 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 					goto clean_up_runlist;
 				}
 			}
-		mutex_init(&runlist->mutex);
+		nvgpu_mutex_init(&runlist->mutex);
 
 		/* None of buffers is pinned if this value doesn't change.
 		    Otherwise, one of them (cur_buffer) must have been pinned. */
@@ -294,7 +294,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	init_runlist(g, f);
 
 	INIT_LIST_HEAD(&f->free_chs);
-	mutex_init(&f->free_chs_mutex);
+	nvgpu_mutex_init(&f->free_chs_mutex);
 
 	for (chid = 0; chid < f->num_channels; chid++) {
 		f->channel[chid].userd_iova =
@@ -306,10 +306,10 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 		gk20a_init_channel_support(g, chid);
 		gk20a_init_tsg_support(g, chid);
 	}
-	mutex_init(&f->tsg_inuse_mutex);
+	nvgpu_mutex_init(&f->tsg_inuse_mutex);
 
 	f->deferred_reset_pending = false;
-	mutex_init(&f->deferred_reset_mutex);
+	nvgpu_mutex_init(&f->deferred_reset_mutex);
 
 	f->sw_ready = true;
 
@@ -534,12 +534,12 @@ static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
 
 	runlist = &f->runlist_info[runlist_id];
 
-	mutex_lock(&runlist->mutex);
+	nvgpu_mutex_acquire(&runlist->mutex);
 
 	ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
 					wait_for_finish);
 
-	mutex_unlock(&runlist->mutex);
+	nvgpu_mutex_release(&runlist->mutex);
 	return ret;
 }
 
@@ -679,7 +679,7 @@ static int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
 		struct channel_gk20a *ch)
 {
-	mutex_lock(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
 	if (ch->error_notifier_ref) {
 		if (ch->error_notifier->status == 0xffff) {
 			/* If error code is already set, this mmu fault
@@ -691,7 +691,7 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
 				NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
 		}
 	}
-	mutex_unlock(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&ch->error_notifier_mutex);
 
 	/* mark channel as faulted */
 	ch->has_timedout = true;
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 36bbee00..5c637d25 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -866,7 +866,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
 	gr->g = g;
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
-	mutex_init(&g->gr.cs_lock);
+	nvgpu_mutex_init(&g->gr.cs_lock);
 #endif
 
 	err = vgpu_gr_init_gr_config(g, gr);
@@ -885,7 +885,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
 	if (err)
 		goto clean_up;
 
-	mutex_init(&gr->ctx_mutex);
+	nvgpu_mutex_init(&gr->ctx_mutex);
 
 	gr->sm_error_states = kzalloc(
 			sizeof(struct nvgpu_dbg_gpu_sm_error_state_record) *
@@ -1078,9 +1078,9 @@ static int vgpu_gr_clear_sm_error_state(struct gk20a *g,
 {
 	struct gr_gk20a *gr = &g->gr;
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	return 0;
 }
@@ -1096,7 +1096,7 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
 		return;
 	}
 
-	mutex_lock(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
 	sm_error_states = &g->gr.sm_error_states[info->sm_id];
 
@@ -1108,7 +1108,7 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
 	sm_error_states->hww_warp_esr_report_mask =
 				info->hww_warp_esr_report_mask;
 
-	mutex_unlock(&g->dbg_sessions_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 
 void vgpu_init_gr_ops(struct gpu_ops *gops)
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index c6780cf7..70b50e7e 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -200,7 +200,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
 	int err;
 
 	gk20a_dbg_fn("");
-	mutex_lock(&vm->update_gmmu_lock);
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	/* TBD: add a flag here for the unmap code to recognize teardown
 	 * and short-circuit any otherwise expensive operations. */
@@ -231,7 +231,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
 	if (nvgpu_alloc_initialized(&vm->user))
 		nvgpu_alloc_destroy(&vm->user);
 
-	mutex_unlock(&vm->update_gmmu_lock);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
 	/* vm is not used anymore. release it. */
 	kfree(vm);
@@ -401,7 +401,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
 
 	vm->mapped_buffers = RB_ROOT;
 
-	mutex_init(&vm->update_gmmu_lock);
+	nvgpu_mutex_init(&vm->update_gmmu_lock);
 	kref_init(&vm->ref);
 	INIT_LIST_HEAD(&vm->reserved_va_list);
 
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index ba387de9..4c88ab96 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -250,9 +250,9 @@ static int vgpu_init_support(struct platform_device *pdev)
 	}
 	g->bar1_mem = r;
 
-	mutex_init(&g->dbg_sessions_lock);
-	mutex_init(&g->client_lock);
-	mutex_init(&g->ch_wdt_lock);
+	nvgpu_mutex_init(&g->dbg_sessions_lock);
+	nvgpu_mutex_init(&g->client_lock);
+	nvgpu_mutex_init(&g->ch_wdt_lock);
 
 	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
 	if (!g->dbg_regops_tmp_buf) {
@@ -576,7 +576,7 @@ int vgpu_probe(struct platform_device *pdev)
 
 	init_rwsem(&gk20a->busy_lock);
 
-	spin_lock_init(&gk20a->mc_enable_lock);
+	nvgpu_spinlock_init(&gk20a->mc_enable_lock);
 
 	/* Initialize the platform interface. */
 	err = platform->probe(dev);
-- 
cgit v1.2.2