From 24e1c7e0a729158be36d63b821550d206a8a0436 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Wed, 8 Mar 2017 17:08:50 -0800
Subject: gpu: nvgpu: Use new kmem API functions (misc)

Use the new kmem API functions in misc gk20a code. Some additional
modifications were also made:

  o  Add a struct gk20a pointer to gk20a_fence to enable proper
     kmem free usage.
  o  Add gk20a pointer to alloc_session() in dbg_gpu_gk20a.c to
     use kmem API for allocating a session.
  o  Plumb a gk20a pointer through the fence creation and deletion.
  o  Use statically allocated buffers for names in file creation.

Bug 1799159
Bug 1823380

Change-Id: I3678080e3ffa1f9bcf6934e3f4819a1bc531689b
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1318323
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c          | 19 ++++++------
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c          |  6 ++--
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c |  6 ++--
 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c       | 12 ++++----
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c         | 44 +++++++++++++---------------
 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c  | 11 ++++---
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c      | 22 +++++++-------
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c   | 15 ++++++----
 drivers/gpu/nvgpu/gk20a/fence_gk20a.c        | 20 ++++++++-----
 drivers/gpu/nvgpu/gk20a/fence_gk20a.h        |  3 ++
 drivers/gpu/nvgpu/gk20a/gk20a_scale.c        |  8 +++--
 drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c        |  4 ++-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c          | 32 ++++++++++----------
 drivers/gpu/nvgpu/gk20a/sync_gk20a.c         | 18 ++++++++----
 drivers/gpu/nvgpu/gk20a/sync_gk20a.h         |  4 ++-
 15 files changed, 128 insertions(+), 96 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 5b8ee642..02636206 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -26,6 +26,7 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
 
 #include "gk20a.h"
 #include "channel_gk20a.h"
@@ -55,7 +56,7 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
 		gk20a_gmmu_unmap_free(cde_ctx->vm, mem);
 	}
 
-	kfree(cde_ctx->init_convert_cmd);
+	nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd);
 
 	cde_ctx->convert_cmd = NULL;
 	cde_ctx->init_convert_cmd = NULL;
@@ -86,7 +87,7 @@ __must_hold(&cde_app->mutex)
 	/* housekeeping on app */
 	list_del(&cde_ctx->list);
 	cde_ctx->g->cde_app.ctx_count--;
-	kfree(cde_ctx);
+	nvgpu_kfree(g, cde_ctx);
 }
 
 static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
@@ -535,8 +536,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
 	}
 
 	/* allocate gpfifo entries to be pushed */
-	*gpfifo = kzalloc(sizeof(struct nvgpu_gpfifo) * num_elems,
-			  GFP_KERNEL);
+	*gpfifo = nvgpu_kzalloc(cde_ctx->g,
+				sizeof(struct nvgpu_gpfifo) * num_elems);
 	if (!*gpfifo) {
 		gk20a_warn(cde_ctx->dev, "cde: could not allocate memory for gpfifo entries");
 		return -ENOMEM;
@@ -588,7 +589,7 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
 	struct nvgpu_gpfifo *combined_cmd;
 
 	/* allocate buffer that has space for both */
-	combined_cmd = kzalloc(total_bytes, GFP_KERNEL);
+	combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes);
 	if (!combined_cmd) {
 		gk20a_warn(cde_ctx->dev,
 				"cde: could not allocate memory for gpfifo entries");
@@ -600,8 +601,8 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
 	memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
 			cde_ctx->convert_cmd, conv_bytes);
 
-	kfree(cde_ctx->init_convert_cmd);
-	kfree(cde_ctx->convert_cmd);
+	nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd);
+	nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd);
 
 	cde_ctx->init_convert_cmd = combined_cmd;
 	cde_ctx->convert_cmd = combined_cmd
@@ -893,7 +894,7 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
 	struct gk20a_cde_ctx *cde_ctx;
 	int ret;
 
-	cde_ctx = kzalloc(sizeof(*cde_ctx), GFP_KERNEL);
+	cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
 	if (!cde_ctx)
 		return ERR_PTR(-ENOMEM);
 
@@ -902,7 +903,7 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
 
 	ret = gk20a_cde_load(cde_ctx);
 	if (ret) {
-		kfree(cde_ctx);
+		nvgpu_kfree(g, cde_ctx);
 		return ERR_PTR(ret);
 	}
 
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 418572a1..3ca38715 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -26,6 +26,8 @@
 #include <linux/nvhost.h>
 #include <linux/debugfs.h>
 
+#include <nvgpu/kmem.h>
+
 #include "gk20a.h"
 #include "debug_gk20a.h"
 
@@ -207,7 +209,7 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
 	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
 	nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex);
 
-	kfree(ce_ctx);
+	nvgpu_kfree(ce_ctx->g, ce_ctx);
 }
 
 static inline unsigned int gk20a_ce_get_method_size(int request_operation)
@@ -428,7 +430,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev,
 	if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE)
 		return ctx_id;
 
-	ce_ctx = kzalloc(sizeof(*ce_ctx), GFP_KERNEL);
+	ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx));
 	if (!ce_ctx)
 		return ctx_id;
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index e84f70a2..8baf60dd 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -667,7 +667,7 @@ static int gk20a_channel_semaphore_wait_fd(
 	ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
 	if (ret == 0) {
 		if (fp_sema) {
-			err = gk20a_fence_from_semaphore(fence,
+			err = gk20a_fence_from_semaphore(c->g, fence,
 					sema->timeline,
 					fp_sema,
 					&c->semaphore_wq,
@@ -734,7 +734,7 @@ static int gk20a_channel_semaphore_wait_fd(
 	 *  that we properly clean up in the event the sync_fence has
 	 *  already signaled
 	 */
-	err = gk20a_fence_from_semaphore(fence, sema->timeline, w->sema,
+	err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
 			&c->semaphore_wq, NULL, false, false);
 	if (err)
 		goto clean_up_sema;
@@ -810,7 +810,7 @@ static int __gk20a_channel_semaphore_incr(
 	/* Release the completion semaphore. */
 	add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
 
-	err = gk20a_fence_from_semaphore(fence,
+	err = gk20a_fence_from_semaphore(c->g, fence,
 			sp->timeline, semaphore,
 			&c->semaphore_wq,
 			dependency, wfi_cmd,
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 4bc7ee52..53d5f78d 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -19,9 +19,11 @@
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-buf.h>
-#include <nvgpu/lock.h>
 #include <linux/vmalloc.h>
 
+#include <nvgpu/kmem.h>
+#include <nvgpu/lock.h>
+
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
 
@@ -112,7 +114,7 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr)
 	if (gr->cs_data)
 		return 0;
 
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	data = nvgpu_kzalloc(gr->g, sizeof(*data));
 	if (!data)
 		return -ENOMEM;
 
@@ -234,7 +236,7 @@ static void css_gr_free_shared_data(struct gr_gk20a *gr)
 		g->ops.css.disable_snapshot(gr);
 
 		/* release the objects */
-		kfree(gr->cs_data);
+		nvgpu_kfree(gr->g, gr->cs_data);
 		gr->cs_data = NULL;
 	}
 }
@@ -458,7 +460,7 @@ static int css_gr_free_client_data(struct gk20a *g,
 		dma_buf_put(client->dma_handler);
 	}
 
-	kfree(client);
+	nvgpu_kfree(g, client);
 
 	return ret;
 }
@@ -471,7 +473,7 @@ static int css_gr_create_client_data(struct gk20a *g,
 	struct gk20a_cs_snapshot_client *cur;
 	int ret = 0;
 
-	cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+	cur = nvgpu_kzalloc(g, sizeof(*cur));
 	if (!cur) {
 		ret = -ENOMEM;
 		goto failed;
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 5661b402..fba39a50 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -23,6 +23,8 @@
 #include <uapi/linux/nvgpu.h>
 #include <linux/delay.h>
 
+#include <nvgpu/kmem.h>
+
 #include "gk20a.h"
 #include "fence_gk20a.h"
 
@@ -52,7 +54,7 @@ int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
 	if (!g)
 		return -ENODEV;
 
-	priv = kzalloc(sizeof(struct gk20a_ctrl_priv), GFP_KERNEL);
+	priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv));
 	if (!priv) {
 		err = -ENOMEM;
 		goto free_ref;
@@ -94,7 +96,7 @@ int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
 #endif
 
 	gk20a_put(g);
-	kfree(priv);
+	nvgpu_kfree(g, priv);
 
 	return 0;
 }
@@ -195,18 +197,16 @@ static int gk20a_ctrl_alloc_as(
 	int err;
 	int fd;
 	struct file *file;
-	char *name;
+	char name[64];
 
 	err = get_unused_fd_flags(O_RDWR);
 	if (err < 0)
 		return err;
 	fd = err;
 
-	name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
-			g->name, fd);
+	snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd);
 
 	file = anon_inode_getfile(name, g->as.cdev.ops, NULL, O_RDWR);
-	kfree(name);
 	if (IS_ERR(file)) {
 		err = PTR_ERR(file);
 		goto clean_up;
@@ -236,18 +236,16 @@ static int gk20a_ctrl_open_tsg(struct gk20a *g,
 	int err;
 	int fd;
 	struct file *file;
-	char *name;
+	char name[64];
 
 	err = get_unused_fd_flags(O_RDWR);
 	if (err < 0)
 		return err;
 	fd = err;
 
-	name = kasprintf(GFP_KERNEL, "nvgpu-%s-tsg%d",
-			 g->name, fd);
+	snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd);
 
 	file = anon_inode_getfile(name, g->tsg.cdev.ops, NULL, O_RDWR);
-	kfree(name);
 	if (IS_ERR(file)) {
 		err = PTR_ERR(file);
 		goto clean_up;
@@ -407,7 +405,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
 
 	sm_count = g->gr.gpc_count * g->gr.tpc_count;
 	size = sm_count * sizeof(struct warpstate);
-	w_state = kzalloc(size, GFP_KERNEL);
+	w_state = nvgpu_kzalloc(g, size);
 	if (!w_state)
 		return -ENOMEM;
 
@@ -421,7 +419,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
 	}
 
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	kfree(w_state);
+	nvgpu_kfree(g, w_state);
 	return err;
 }
 
@@ -473,7 +471,7 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
 	struct nvgpu_gpu_vsms_mapping_entry *vsms_buf;
 	u32 i;
 
-	vsms_buf = kzalloc(write_size, GFP_KERNEL);
+	vsms_buf = nvgpu_kzalloc(g, write_size);
 	if (vsms_buf == NULL)
 		return -ENOMEM;
 
@@ -485,7 +483,7 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
 	err = copy_to_user((void __user *)(uintptr_t)
 			   args->vsms_map_buf_addr,
 			   vsms_buf, write_size);
-	kfree(vsms_buf);
+	nvgpu_kfree(g, vsms_buf);
 
 	return err;
 }
@@ -760,7 +758,7 @@ static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
 	if (err)
 		return err;
 
-	fpoints = kcalloc(max_points, sizeof(u16), GFP_KERNEL);
+	fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16));
 	if (!fpoints)
 		return -ENOMEM;
 
@@ -797,7 +795,7 @@ static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
 	args->num_entries = num_points;
 
 fail:
-	kfree(fpoints);
+	nvgpu_kfree(g, fpoints);
 	return err;
 }
 
@@ -1245,13 +1243,13 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 
 		memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args));
 
-		zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL);
+		zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info));
 		if (zcull_info == NULL)
 			return -ENOMEM;
 
 		err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
 		if (err) {
-			kfree(zcull_info);
+			nvgpu_kfree(g, zcull_info);
 			break;
 		}
 
@@ -1266,12 +1264,12 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
 		get_info_args->subregion_count = zcull_info->subregion_count;
 
-		kfree(zcull_info);
+		nvgpu_kfree(g, zcull_info);
 		break;
 	case NVGPU_GPU_IOCTL_ZBC_SET_TABLE:
 		set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf;
 
-		zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
+		zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry));
 		if (zbc_val == NULL)
 			return -ENOMEM;
 
@@ -1303,12 +1301,12 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		}
 
 		if (zbc_val)
-			kfree(zbc_val);
+			nvgpu_kfree(g, zbc_val);
 		break;
 	case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE:
 		query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf;
 
-		zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL);
+		zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params));
 		if (zbc_tbl == NULL)
 			return -ENOMEM;
 
@@ -1342,7 +1340,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		}
 
 		if (zbc_tbl)
-			kfree(zbc_tbl);
+			nvgpu_kfree(g, zbc_tbl);
 		break;
 
 	case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS:
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index c75689f2..1c3ba9c2 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -24,6 +24,9 @@
 #include <linux/log2.h>
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/kmem.h>
+
 #include "ctxsw_trace_gk20a.h"
 #include "gk20a.h"
 #include "gr_gk20a.h"
@@ -196,7 +199,7 @@ static int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
 {
 	struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
 
-	vfree(dev->hdr);
+	nvgpu_vfree(g, dev->hdr);
 	return 0;
 }
 
@@ -516,7 +519,7 @@ int gk20a_ctxsw_trace_init(struct gk20a *g)
 	if (likely(trace))
 		return 0;
 
-	trace = kzalloc(sizeof(*trace), GFP_KERNEL);
+	trace = nvgpu_kzalloc(g, sizeof(*trace));
 	if (unlikely(!trace))
 		return -ENOMEM;
 	g->ctxsw_trace = trace;
@@ -533,7 +536,7 @@ int gk20a_ctxsw_trace_init(struct gk20a *g)
 
 fail:
 	memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
-	kfree(trace);
+	nvgpu_kfree(g, trace);
 	g->ctxsw_trace = NULL;
 	return err;
 #else
@@ -559,7 +562,7 @@ void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
 		dev++;
 	}
 
-	kfree(g->ctxsw_trace);
+	nvgpu_kfree(g, g->ctxsw_trace);
 	g->ctxsw_trace = NULL;
 
 	g->ops.fecs_trace.deinit(g);
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index b5098849..7054e2a2 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -69,14 +69,14 @@ static int generate_unique_id(void)
 	return atomic_add_return(1, &unique_id);
 }
 
-static int alloc_session(struct dbg_session_gk20a **_dbg_s)
+static int alloc_session(struct gk20a *g, struct dbg_session_gk20a **_dbg_s)
 {
 	struct dbg_session_gk20a *dbg_s;
 	*_dbg_s = NULL;
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
-	dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
+	dbg_s = nvgpu_kzalloc(g, sizeof(*dbg_s));
 	if (!dbg_s)
 		return -ENOMEM;
 
@@ -125,7 +125,7 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name);
 
-	err  = alloc_session(&dbg_session);
+	err  = alloc_session(g, &dbg_session);
 	if (err)
 		goto free_ref;
 
@@ -443,10 +443,10 @@ int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 
 	session_data = ch_data->session_data;
 	list_del_init(&session_data->dbg_s_entry);
-	kfree(session_data);
+	nvgpu_kfree(dbg_s->g, session_data);
 
 	fput(ch_data->ch_f);
-	kfree(ch_data);
+	nvgpu_kfree(dbg_s->g, ch_data);
 
 	return 0;
 }
@@ -545,7 +545,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
 	nvgpu_mutex_destroy(&dbg_s->ioctl_lock);
 
-	kfree(dbg_s);
+	nvgpu_kfree(g, dbg_s);
 	gk20a_put(g);
 
 	return 0;
@@ -582,7 +582,7 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	nvgpu_mutex_acquire(&ch->dbg_s_lock);
 
-	ch_data = kzalloc(sizeof(*ch_data), GFP_KERNEL);
+	ch_data = nvgpu_kzalloc(g, sizeof(*ch_data));
 	if (!ch_data) {
 		fput(f);
 		return -ENOMEM;
@@ -592,9 +592,9 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 	ch_data->chid = ch->hw_chid;
 	INIT_LIST_HEAD(&ch_data->ch_entry);
 
-	session_data = kzalloc(sizeof(*session_data), GFP_KERNEL);
+	session_data = nvgpu_kzalloc(g, sizeof(*session_data));
 	if (!session_data) {
-		kfree(ch_data);
+		nvgpu_kfree(g, ch_data);
 		fput(f);
 		return -ENOMEM;
 	}
@@ -796,7 +796,7 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
 	if (sm_id >= gr->no_of_sm)
 		return -EINVAL;
 
-	sm_error_state = kzalloc(sizeof(*sm_error_state), GFP_KERNEL);
+	sm_error_state = nvgpu_kzalloc(g, sizeof(*sm_error_state));
 	if (!sm_error_state)
 		return -ENOMEM;
 
@@ -829,7 +829,7 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
 	gk20a_idle(g);
 
 err_free:
-	kfree(sm_error_state);
+	nvgpu_kfree(g, sm_error_state);
 
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index da2421d9..dbf67c71 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -23,6 +23,9 @@
 #include <linux/debugfs.h>
 #include <linux/log2.h>
 #include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/kmem.h>
+
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
 #include "gk20a.h"
@@ -151,7 +154,7 @@ static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
 		"adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
 
-	he = kzalloc(sizeof(*he), GFP_KERNEL);
+	he = nvgpu_kzalloc(g, sizeof(*he));
 	if (unlikely(!he)) {
 		gk20a_warn(dev_from_gk20a(g),
 			"can't alloc new hash entry for context_ptr=%x pid=%d",
@@ -184,7 +187,7 @@ static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
 			gk20a_dbg(gpu_dbg_ctxsw,
 				"freed hash entry=%p context_ptr=%x", ent,
 				ent->context_ptr);
-			kfree(ent);
+			nvgpu_kfree(g, ent);
 			break;
 		}
 	}
@@ -203,7 +206,7 @@ static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
 	nvgpu_mutex_acquire(&trace->hash_lock);
 	hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
 		hash_del(&ent->node);
-		kfree(ent);
+		nvgpu_kfree(g, ent);
 	}
 	nvgpu_mutex_release(&trace->hash_lock);
 
@@ -566,7 +569,7 @@ static int gk20a_fecs_trace_init(struct gk20a *g)
 	struct gk20a_fecs_trace *trace;
 	int err;
 
-	trace = kzalloc(sizeof(struct gk20a_fecs_trace), GFP_KERNEL);
+	trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace));
 	if (!trace) {
 		gk20a_warn(dev_from_gk20a(g), "failed to allocate fecs_trace");
 		return -ENOMEM;
@@ -600,7 +603,7 @@ clean_hash_lock:
 clean_poll_lock:
 	nvgpu_mutex_destroy(&trace->poll_lock);
 clean:
-	kfree(trace);
+	nvgpu_kfree(g, trace);
 	g->fecs_trace = NULL;
 	return err;
 }
@@ -712,7 +715,7 @@ static int gk20a_fecs_trace_deinit(struct gk20a *g)
 	nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
 	nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
 
-	kfree(g->fecs_trace);
+	nvgpu_kfree(g, g->fecs_trace);
 	g->fecs_trace = NULL;
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index ea3cd3ff..63da0959 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -18,6 +18,7 @@
 #include <linux/version.h>
 
 #include <nvgpu/semaphore.h>
+#include <nvgpu/kmem.h>
 
 #include "gk20a.h"
 #include "channel_gk20a.h"
@@ -42,6 +43,8 @@ static void gk20a_fence_free(struct kref *ref)
 {
 	struct gk20a_fence *f =
 		container_of(ref, struct gk20a_fence, ref);
+	struct gk20a *g = f->g;
+
 #ifdef CONFIG_SYNC
 	if (f->sync_fence)
 		sync_fence_put(f->sync_fence);
@@ -53,7 +56,7 @@ static void gk20a_fence_free(struct kref *ref)
 		if (nvgpu_alloc_initialized(f->allocator))
 			nvgpu_free(f->allocator, (size_t)f);
 	} else
-		kfree(f);
+		nvgpu_kfree(g, f);
 }
 
 void gk20a_fence_put(struct gk20a_fence *f)
@@ -124,7 +127,7 @@ int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
 	size = sizeof(struct gk20a_fence);
 	if (count <= UINT_MAX / size) {
 		size = count * size;
-		fence_pool = vzalloc(size);
+		fence_pool = nvgpu_vzalloc(c->g, size);
 	}
 
 	if (!fence_pool)
@@ -139,7 +142,7 @@ int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
 	return 0;
 
 fail:
-	vfree(fence_pool);
+	nvgpu_vfree(c->g, fence_pool);
 	return err;
 }
 
@@ -150,7 +153,7 @@ void gk20a_free_fence_pool(struct channel_gk20a *c)
 				nvgpu_alloc_base(&c->fence_allocator);
 
 		nvgpu_alloc_destroy(&c->fence_allocator);
-		vfree(base);
+		nvgpu_vfree(c->g, base);
 	}
 }
 
@@ -171,10 +174,12 @@ struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
 			}
 		}
 	} else
-		fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
+		fence = nvgpu_kzalloc(c->g, sizeof(struct gk20a_fence));
 
-	if (fence)
+	if (fence) {
 		kref_init(&fence->ref);
+		fence->g = c->g;
+	}
 
 	return fence;
 }
@@ -223,6 +228,7 @@ static const struct gk20a_fence_ops nvgpu_semaphore_fence_ops = {
 
 /* This function takes ownership of the semaphore */
 int gk20a_fence_from_semaphore(
+		struct gk20a *g,
 		struct gk20a_fence *fence_out,
 		struct sync_timeline *timeline,
 		struct nvgpu_semaphore *semaphore,
@@ -235,7 +241,7 @@ int gk20a_fence_from_semaphore(
 
 #ifdef CONFIG_SYNC
 	if (need_sync_fence) {
-		sync_fence = gk20a_sync_fence_create(timeline, semaphore,
+		sync_fence = gk20a_sync_fence_create(g, timeline, semaphore,
 					dependency, "f-gk20a-0x%04x",
 					nvgpu_semaphore_gpu_ro_va(semaphore));
 		if (!sync_fence)
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index b4283f58..9d9fde85 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -30,6 +30,8 @@ struct channel_gk20a;
 struct gk20a_fence_ops;
 
 struct gk20a_fence {
+	struct gk20a *g;
+
 	/* Valid for all fence types: */
 	bool valid;
 	struct kref ref;
@@ -52,6 +54,7 @@ struct gk20a_fence {
 
 /* Fences can be created from semaphores or syncpoint (id, value) pairs */
 int gk20a_fence_from_semaphore(
+		struct gk20a *g,
 		struct gk20a_fence *fence_out,
 		struct sync_timeline *timeline,
 		struct nvgpu_semaphore *semaphore,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
index bae5c2d8..aabe673a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
@@ -26,6 +26,8 @@
 
 #include <governor.h>
 
+#include <nvgpu/kmem.h>
+
 #include "gk20a.h"
 #include "pmu_gk20a.h"
 #include "clk_gk20a.h"
@@ -364,7 +366,7 @@ void gk20a_scale_init(struct device *dev)
 	if (!platform->devfreq_governor && !platform->qos_notify)
 		return;
 
-	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
+	profile = nvgpu_kzalloc(g, sizeof(*profile));
 
 	profile->dev = dev;
 	profile->dev_stat.busy = false;
@@ -415,7 +417,7 @@ void gk20a_scale_init(struct device *dev)
 	return;
 
 err_get_freqs:
-	kfree(profile);
+	nvgpu_kfree(g, profile);
 }
 
 void gk20a_scale_exit(struct device *dev)
@@ -436,7 +438,7 @@ void gk20a_scale_exit(struct device *dev)
 		g->devfreq = NULL;
 	}
 
-	kfree(g->scale_profile);
+	nvgpu_kfree(g, g->scale_profile);
 	g->scale_profile = NULL;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index 2ab15357..4b3bb83c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -29,6 +29,8 @@
 #include <soc/tegra/tegra-dvfs.h>
 #endif
 
+#include <nvgpu/kmem.h>
+
 #include "gk20a.h"
 #include "gr_gk20a.h"
 #include "fifo_gk20a.h"
@@ -794,7 +796,7 @@ static ssize_t tpc_fs_mask_store(struct device *dev,
 
 		g->ops.gr.set_gpc_tpc_mask(g, 0);
 
-		vfree(g->gr.ctx_vars.local_golden_image);
+		nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image);
 		g->gr.ctx_vars.local_golden_image = NULL;
 		g->gr.ctx_vars.golden_image_initialized = false;
 		g->gr.ctx_vars.golden_image_size = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 85fa8ea1..eda4167b 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -25,6 +25,7 @@
 
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/timers.h>
+#include <nvgpu/kmem.h>
 
 #include "gk20a.h"
 #include "gr_gk20a.h"
@@ -301,7 +302,7 @@ static void printtrace(struct pmu_gk20a *pmu)
 	u32 *trace1;
 
 	/* allocate system memory to copy pmu trace buffer */
-	tracebuffer = kzalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL);
+	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
 	if (tracebuffer == NULL)
 		return;
 
@@ -335,7 +336,7 @@ static void printtrace(struct pmu_gk20a *pmu)
 		scnprintf((buf + count), 0x40, "%s", (trace+i+20+m));
 		gk20a_err(dev_from_gk20a(g), "%s", buf);
 	}
-	kfree(tracebuffer);
+	nvgpu_kfree(g, tracebuffer);
 }
 
 static void set_pmu_cmdline_args_falctracedmabase_v1(struct pmu_gk20a *pmu)
@@ -3163,8 +3164,8 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
 		pmu->perfmon_sampling_enabled = true;
 
 	pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
-	pmu->mutex = kzalloc(pmu->mutex_cnt *
-		sizeof(struct pmu_mutex), GFP_KERNEL);
+	pmu->mutex = nvgpu_kzalloc(g, pmu->mutex_cnt *
+		sizeof(struct pmu_mutex));
 	if (!pmu->mutex) {
 		err = -ENOMEM;
 		goto err;
@@ -3175,8 +3176,8 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
 		pmu->mutex[i].index = i;
 	}
 
-	pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
-		sizeof(struct pmu_sequence), GFP_KERNEL);
+	pmu->seq = nvgpu_kzalloc(g, PMU_MAX_NUM_SEQUENCES *
+		sizeof(struct pmu_sequence));
 	if (!pmu->seq) {
 		err = -ENOMEM;
 		goto err_free_mutex;
@@ -3218,9 +3219,9 @@ skip_init:
  err_free_seq_buf:
 	gk20a_gmmu_unmap_free(vm, &pmu->seq_buf);
  err_free_seq:
-	kfree(pmu->seq);
+	nvgpu_kfree(g, pmu->seq);
  err_free_mutex:
-	kfree(pmu->mutex);
+	nvgpu_kfree(g, pmu->mutex);
  err:
 	gk20a_dbg_fn("fail");
 	return err;
@@ -4060,7 +4061,7 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
 
 		gk20a_pmu_surface_free(g, seq->out_mem);
 		if (seq->out_mem != seq->in_mem)
-			kfree(seq->out_mem);
+			nvgpu_kfree(g, seq->out_mem);
 		else
 			seq->out_mem = NULL;
 	}
@@ -4072,7 +4073,7 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
 				pv->get_pmu_seq_in_a_ptr(seq)));
 
 		gk20a_pmu_surface_free(g, seq->in_mem);
-		kfree(seq->in_mem);
+		nvgpu_kfree(g, seq->in_mem);
 		seq->in_mem = NULL;
 	}
 
@@ -4822,8 +4823,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 			goto clean_up;
 
 		if (payload->in.fb_size != 0x0) {
-			seq->in_mem = kzalloc(sizeof(struct mem_desc),
-				GFP_KERNEL);
+			seq->in_mem = nvgpu_kzalloc(g, sizeof(struct mem_desc));
 			if (!seq->in_mem) {
 				err = -ENOMEM;
 				goto clean_up;
@@ -4866,8 +4866,8 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 				goto clean_up;
 
 			if (payload->out.fb_size != 0x0) {
-				seq->out_mem = kzalloc(sizeof(struct mem_desc),
-					GFP_KERNEL);
+				seq->out_mem = nvgpu_kzalloc(g,
+					sizeof(struct mem_desc));
 				if (!seq->out_mem) {
 					err = -ENOMEM;
 					goto clean_up;
@@ -5690,7 +5690,7 @@ static int falc_trace_show(struct seq_file *s, void *data)
 	u32 *trace1;
 
 	/* allocate system memory to copy pmu trace buffer */
-	tracebuffer = kzalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL);
+	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
 	if (tracebuffer == NULL)
 		return -ENOMEM;
 
@@ -5723,7 +5723,7 @@ static int falc_trace_show(struct seq_file *s, void *data)
 		seq_printf(s, "%s", (trace+i+20+m));
 	}
 
-	kfree(tracebuffer);
+	nvgpu_kfree(g, tracebuffer);
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index f57871d5..b6105a40 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -23,6 +23,7 @@
 #include <nvgpu/lock.h>
 #include <uapi/linux/nvgpu.h>
 
+#include <nvgpu/kmem.h>
 #include <nvgpu/semaphore.h>
 
 #include "../drivers/staging/android/sync.h"
@@ -42,6 +43,7 @@ struct gk20a_sync_timeline {
  * refcounted gk20a_sync_pt for each duped pt.
  */
 struct gk20a_sync_pt {
+	struct gk20a			*g;
 	struct kref			refcount;
 	u32				thresh;
 	struct nvgpu_semaphore		*sema;
@@ -203,26 +205,29 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
 {
 	struct gk20a_sync_pt *pt =
 		container_of(ref, struct gk20a_sync_pt, refcount);
+	struct gk20a *g = pt->g;
 
 	if (pt->dep)
 		sync_fence_put(pt->dep);
 	if (pt->sema)
 		nvgpu_semaphore_put(pt->sema);
-	kfree(pt);
+	nvgpu_kfree(g, pt);
 }
 
 static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
+		struct gk20a *g,
 		struct gk20a_sync_timeline *obj,
 		struct nvgpu_semaphore *sema,
 		struct sync_fence *dependency)
 {
 	struct gk20a_sync_pt *shared;
 
-	shared = kzalloc(sizeof(*shared), GFP_KERNEL);
+	shared = nvgpu_kzalloc(g, sizeof(*shared));
 	if (!shared)
 		return NULL;
 
 	kref_init(&shared->refcount);
+	shared->g = g;
 	shared->obj = obj;
 	shared->sema = sema;
 	shared->thresh = ++obj->max; /* sync framework has a lock */
@@ -249,6 +254,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 }
 
 static struct sync_pt *gk20a_sync_pt_create_inst(
+		struct gk20a *g,
 		struct gk20a_sync_timeline *obj,
 		struct nvgpu_semaphore *sema,
 		struct sync_fence *dependency)
@@ -260,7 +266,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
 	if (!pti)
 		return NULL;
 
-	pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency);
+	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency);
 	if (!pti->shared) {
 		sync_pt_free(&pti->pt);
 		return NULL;
@@ -506,7 +512,9 @@ struct sync_timeline *gk20a_sync_timeline_create(
 	return &obj->obj;
 }
 
-struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *obj,
+struct sync_fence *gk20a_sync_fence_create(
+		struct gk20a *g,
+		struct sync_timeline *obj,
 		struct nvgpu_semaphore *sema,
 		struct sync_fence *dependency,
 		const char *fmt, ...)
@@ -517,7 +525,7 @@ struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *obj,
 	struct sync_fence *fence;
 	struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
 
-	pt = gk20a_sync_pt_create_inst(timeline, sema, dependency);
+	pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency);
 	if (pt == NULL)
 		return NULL;
 
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index dcced5c8..5dcdfe15 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -34,7 +34,9 @@ struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f);
 struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
 void gk20a_sync_timeline_destroy(struct sync_timeline *);
 void gk20a_sync_timeline_signal(struct sync_timeline *);
-struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *,
+struct sync_fence *gk20a_sync_fence_create(
+		struct gk20a *g,
+		struct sync_timeline *,
 		struct nvgpu_semaphore *,
 		struct sync_fence *dependency,
 		const char *fmt, ...);
-- 
cgit v1.2.2