From 707ea45e0f1d7a07885597777496b186dd5fb6f0 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Wed, 11 Jan 2017 15:00:54 -0800
Subject: gpu: nvgpu: kmem abstraction and tracking

Implement kmem abstraction and tracking in nvgpu. The abstraction
helps move nvgpu's core code away from being Linux dependent and
allows kmem allocation tracking to be done for Linux and any other
OS supported by nvgpu.

Bug 1799159
Bug 1823380

Change-Id: Ieaae4ca1bbd1d4db4a1546616ab8b9fc53a4079d
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1283828
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 12 ++++++------
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c |  4 ++--
 drivers/gpu/nvgpu/gk20a/debug_gk20a.c   |  4 ++++
 drivers/gpu/nvgpu/gk20a/gk20a.c         |  3 +++
 drivers/gpu/nvgpu/gk20a/gk20a.h         |  7 +++++++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      |  6 +++---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c      |  6 +++---
 7 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f228110e..68e43259 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
 
 	gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
-	nvgpu_big_free(ch->gpfifo.pipe);
+	nvgpu_big_free(g, ch->gpfifo.pipe);
 	memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1856,7 +1856,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
 	}
 
 	if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
-		c->gpfifo.pipe = nvgpu_big_malloc(
+		c->gpfifo.pipe = nvgpu_big_malloc(g,
 				gpfifo_size * sizeof(struct nvgpu_gpfifo));
 		if (!c->gpfifo.pipe) {
 			err = -ENOMEM;
@@ -1927,7 +1927,7 @@ clean_up_sync:
 		c->sync = NULL;
 	}
 clean_up_unmap:
-	nvgpu_big_free(c->gpfifo.pipe);
+	nvgpu_big_free(g, c->gpfifo.pipe);
 	gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
 clean_up:
 	memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -2057,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 	if (!g) {
 		size = count * sizeof(struct nvgpu_gpfifo);
 		if (size) {
-			g = nvgpu_big_malloc(size);
+			g = nvgpu_big_malloc(c->g, size);
 			if (!g)
 				return;
 
 			if (copy_from_user(g, user_gpfifo, size)) {
-				nvgpu_big_free(g);
+				nvgpu_big_free(c->g, g);
 				return;
 			}
 		}
@@ -2074,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 		trace_write_pushbuffer(c, gp);
 
 	if (gpfifo_allocated)
-		nvgpu_big_free(g);
+		nvgpu_big_free(c->g, g);
 }
 
 static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 4a42e03f..0a0aada7 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -819,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
 		goto fail_dmabuf_put;
 	}
 
-	buffer = nvgpu_big_zalloc(access_limit_size);
+	buffer = nvgpu_big_zalloc(g, access_limit_size);
 	if (!buffer) {
 		err = -ENOMEM;
 		goto fail_dmabuf_put;
@@ -865,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
 fail_idle:
 	gk20a_idle(g->dev);
 fail_free_buffer:
-	nvgpu_big_free(buffer);
+	nvgpu_big_free(g, buffer);
 fail_dmabuf_put:
 	dma_buf_put(dmabuf);
 
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 67f9b532..6341a962 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 
 #include <nvgpu/semaphore.h>
+#include <nvgpu/kmem.h>
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
@@ -485,6 +486,9 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
 	gk20a_mm_debugfs_init(g->dev);
 	gk20a_fifo_debugfs_init(g->dev);
 	gk20a_sched_debugfs_init(g->dev);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	nvgpu_kmem_debugfs_init(g->dev);
+#endif
 #endif
 
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 3504a32f..6b026ee2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -43,6 +43,7 @@
 #include <linux/version.h>
 
 #include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
 #include <nvgpu/allocator.h>
 #include <nvgpu/timers.h>
 
@@ -1598,6 +1599,8 @@ static int gk20a_probe(struct platform_device *dev)
 	set_gk20a(dev, gk20a);
 	gk20a->dev = &dev->dev;
 
+	nvgpu_kmem_init(gk20a);
+
 	gk20a->irq_stall = platform_get_irq(dev, 0);
 	gk20a->irq_nonstall = platform_get_irq(dev, 1);
 	if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8006a4fe..69528c1f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -27,6 +27,7 @@ struct gk20a_ctxsw_ucode_segments;
 struct gk20a_fecs_trace;
 struct gk20a_ctxsw_trace;
 struct acr_desc;
+struct nvgpu_mem_alloc_tracker;
 
 #include <linux/sched.h>
 #include <nvgpu/lock.h>
@@ -915,6 +916,7 @@ struct gk20a {
 	struct dentry *debugfs_runlist_interleave;
 	struct dentry *debugfs_allocators;
 	struct dentry *debugfs_xve;
+	struct dentry *debugfs_kmem;
 #endif
 	struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
 
@@ -1055,6 +1057,10 @@ struct gk20a {
 	/* Check if msi is enabled */
 	bool msi_enabled;
 #endif
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	struct nvgpu_mem_alloc_tracker *vmallocs;
+	struct nvgpu_mem_alloc_tracker *kmallocs;
+#endif
 };
 
 static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
@@ -1131,6 +1137,7 @@ enum gk20a_dbg_categories {
 	gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */
 	gpu_dbg_xv      = BIT(18), /* XVE debugging */
 	gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */
+	gpu_dbg_kmem    = BIT(20), /* Kmem tracking debugging */
 	gpu_dbg_mem     = BIT(31), /* memory accesses, very verbose */
 };
 
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 36b85f3b..e695f02e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3424,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
 	gr->ctx_vars.local_golden_image = NULL;
 
 	if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
-		nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
+		nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
 	gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
 
 	gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -8055,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
 	hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
 	map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
 
-	map = nvgpu_big_zalloc(map_size);
+	map = nvgpu_big_zalloc(g, map_size);
 	if (!map)
 		return -ENOMEM;
 
@@ -8145,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
 	return 0;
 cleanup:
 	gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
-	nvgpu_big_free(map);
+	nvgpu_big_free(g, map);
 	return -EINVAL;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7a64f79b..2ff54653 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1487,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
-	buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) *
-					  vm->num_user_mapped_buffers);
+	buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
+				       vm->num_user_mapped_buffers);
 	if (!buffer_list) {
 		nvgpu_mutex_release(&vm->update_gmmu_lock);
 		return -ENOMEM;
@@ -1572,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
 	gk20a_vm_mapping_batch_finish_locked(vm, &batch);
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
-	nvgpu_big_free(mapped_buffers);
+	nvgpu_big_free(vm->mm->g, mapped_buffers);
 }
 
 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
-- 
cgit v1.2.2