From 861b11a968b1f51f45832486e62bfe23fc29fc19 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 23 Nov 2017 01:03:24 -0800
Subject: gpu: nvgpu: move snapshot_client memory handling to linux

We right now store dmabuf fd and dma_buf pointer for gk20a_cs_snapshot_client
But since dma_buf and all related APIs are linux specific, we need to remove
them from common code and move them to linux specific code

Add new linux specific structure gk20a_cs_snapshot_client_linux which includes
struct gk20a_cs_snapshot_client and linux specific dma_buf pointer

In gk20a_attach_cycle_stats_snapshot(), we first handle all dma_buf related
operations and then call gr_gk20a_css_attach()

Move gk20a_channel_free_cycle_stats_snapshot() to ioctl_channel.c
In gk20a_channel_free_cycle_stats_snapshot(), we call gr_gk20a_css_detach()
and then free up dma_buf in linux specific code

We also need to call gk20a_channel_free_cycle_stats_snapshot() while closing
the channel, so call it from linux specific nvgpu_channel_close_linux()

Jira NVGPU-397
Jira NVGPU-415

Change-Id: Ida27240541f6adf31f28d7d7ee4f51651c6d3de2
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1603908
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/channel.c       |  5 ++
 drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 95 +++++++++++++++++++++++---
 drivers/gpu/nvgpu/common/linux/ioctl_channel.h | 11 +++
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c        | 17 -----
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h        |  1 -
 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c         | 62 +++--------------
 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h         |  6 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h             |  5 +-
 8 files changed, 117 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 1ae2d444..0ed596ac 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -29,6 +29,7 @@
 #include "gk20a/gk20a.h"
 
 #include "channel.h"
+#include "ioctl_channel.h"
 #include "os_linux.h"
 
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -242,6 +243,10 @@ static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
 static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
 {
 	nvgpu_channel_work_completion_clear(ch);
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	gk20a_channel_free_cycle_stats_snapshot(ch);
+#endif
 }
 
 static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 67bec31b..13355605 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -42,6 +42,11 @@
 #include "os_linux.h"
 #include "ctxsw_trace.h"
 
+/* the minimal size of client buffer */
+#define CSS_MIN_CLIENT_SNAPSHOT_SIZE				\
+		(sizeof(struct gk20a_cs_snapshot_fifo) +	\
+		sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
+
 static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
 {
 	switch (graphics_preempt_mode) {
@@ -157,18 +162,92 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 				u32 perfmon_id_count,
 				u32 *perfmon_id_start)
 {
-	int ret;
+	int ret = 0;
+	struct gk20a *g = ch->g;
+	struct gk20a_cs_snapshot_client_linux *client_linux;
+	struct gk20a_cs_snapshot_client *client;
 
 	nvgpu_mutex_acquire(&ch->cs_client_mutex);
 	if (ch->cs_client) {
-		ret = -EEXIST;
-	} else {
-		ret = gr_gk20a_css_attach(ch,
-					dmabuf_fd,
-					perfmon_id_count,
-					perfmon_id_start,
-					&ch->cs_client);
+		nvgpu_mutex_release(&ch->cs_client_mutex);
+		return -EEXIST;
+	}
+
+	client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
+	if (!client_linux) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	client_linux->dmabuf_fd   = dmabuf_fd;
+	client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
+	if (IS_ERR(client_linux->dma_handler)) {
+		ret = PTR_ERR(client_linux->dma_handler);
+		client_linux->dma_handler = NULL;
+		goto err_free;
+	}
+
+	client = &client_linux->cs_client;
+	client->snapshot_size = client_linux->dma_handler->size;
+	if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
+		ret = -ENOMEM;
+		goto err_put;
+	}
+
+	client->snapshot = (struct gk20a_cs_snapshot_fifo *)
+					dma_buf_vmap(client_linux->dma_handler);
+	if (!client->snapshot) {
+		ret = -ENOMEM;
+		goto err_put;
+	}
+
+	ch->cs_client = client;
+
+	ret = gr_gk20a_css_attach(ch,
+				perfmon_id_count,
+				perfmon_id_start,
+				ch->cs_client);
+
+	nvgpu_mutex_release(&ch->cs_client_mutex);
+
+	return ret;
+
+err_put:
+	dma_buf_put(client_linux->dma_handler);
+err_free:
+	nvgpu_kfree(g, client_linux);
+err:
+	nvgpu_mutex_release(&ch->cs_client_mutex);
+	return ret;
+}
+
+int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
+{
+	int ret;
+	struct gk20a_cs_snapshot_client_linux *client_linux;
+
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
+	if (!ch->cs_client) {
+		nvgpu_mutex_release(&ch->cs_client_mutex);
+		return 0;
 	}
+
+	client_linux = container_of(ch->cs_client,
+				struct gk20a_cs_snapshot_client_linux,
+				cs_client);
+
+	ret = gr_gk20a_css_detach(ch, ch->cs_client);
+
+	if (client_linux->dma_handler) {
+		if (ch->cs_client->snapshot)
+			dma_buf_vunmap(client_linux->dma_handler,
+					ch->cs_client->snapshot);
+		dma_buf_put(client_linux->dma_handler);
+	}
+
+	ch->cs_client = NULL;
+	nvgpu_kfree(ch->g, client_linux);
+
 	nvgpu_mutex_release(&ch->cs_client_mutex);
 
 	return ret;
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
index 235d84ef..3ea8d765 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
@@ -15,11 +15,20 @@
 
 #include <linux/fs.h>
 
+#include "gk20a/css_gr_gk20a.h"
+
 struct inode;
 struct file;
 struct gk20a;
 struct nvgpu_channel_open_args;
 
+struct gk20a_cs_snapshot_client_linux {
+	struct gk20a_cs_snapshot_client cs_client;
+
+	u32			dmabuf_fd;
+	struct dma_buf		*dma_handler;
+};
+
 int gk20a_channel_open(struct inode *inode, struct file *filp);
 int gk20a_channel_release(struct inode *inode, struct file *filp);
 long gk20a_channel_ioctl(struct file *filp,
@@ -27,6 +36,8 @@ long gk20a_channel_ioctl(struct file *filp,
 int gk20a_channel_open_ioctl(struct gk20a *g,
 		struct nvgpu_channel_open_args *args);
 
+int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
+
 extern const struct file_operations gk20a_event_id_ops;
 extern const struct file_operations gk20a_channel_ops;
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a0415861..dac38739 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -376,22 +376,6 @@ void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
 	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
 }
 
-int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
-{
-	int ret;
-
-	nvgpu_mutex_acquire(&ch->cs_client_mutex);
-	if (ch->cs_client) {
-		ret = gr_gk20a_css_detach(ch, ch->cs_client);
-		ch->cs_client = NULL;
-	} else {
-		ret = 0;
-	}
-	nvgpu_mutex_release(&ch->cs_client_mutex);
-
-	return ret;
-}
-
 #endif
 
 /* call ONLY when no references to the channel exist: after the last put */
@@ -508,7 +492,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	gk20a_channel_free_cycle_stats_buffer(ch);
-	gk20a_channel_free_cycle_stats_snapshot(ch);
 #endif
 
 	channel_gk20a_free_priv_cmdbuf(ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index ff96d0d7..87ab6202 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -371,7 +371,6 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
 		unsigned int num_inflight_jobs,
 		u32 flags);
 void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
-int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
 
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
 
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index e3896981..afba2496 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -45,11 +45,6 @@
 		((cl)->perfmon_start <= (pm) &&			\
 		((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
 
-/* the minimal size of client buffer */
-#define CSS_MIN_CLIENT_SNAPSHOT_SIZE				\
-		(sizeof(struct gk20a_cs_snapshot_fifo) +	\
-		sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
-
 /* address of fifo entry by offset */
 #define CSS_FIFO_ENTRY(fifo, offs)				\
 	((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
@@ -452,52 +447,16 @@ static int css_gr_free_client_data(struct gk20a *g,
 			ret = -EINVAL;
 	}
 
-	if (client->dma_handler) {
-		if (client->snapshot)
-			dma_buf_vunmap(client->dma_handler, client->snapshot);
-		dma_buf_put(client->dma_handler);
-	}
-
-	nvgpu_kfree(g, client);
-
 	return ret;
 }
 
 static int css_gr_create_client_data(struct gk20a *g,
 			struct gk20a_cs_snapshot *data,
-			u32 dmabuf_fd, u32 perfmon_count,
-			struct gk20a_cs_snapshot_client **client)
+			u32 perfmon_count,
+			struct gk20a_cs_snapshot_client *cur)
 {
-	struct gk20a_cs_snapshot_client *cur;
 	int ret = 0;
 
-	cur = nvgpu_kzalloc(g, sizeof(*cur));
-	if (!cur) {
-		ret = -ENOMEM;
-		goto failed;
-	}
-
-	cur->dmabuf_fd   = dmabuf_fd;
-	cur->dma_handler = dma_buf_get(cur->dmabuf_fd);
-	if (IS_ERR(cur->dma_handler)) {
-		ret = PTR_ERR(cur->dma_handler);
-		cur->dma_handler = NULL;
-		goto failed;
-	}
-
-	cur->snapshot = (struct gk20a_cs_snapshot_fifo *)
-					dma_buf_vmap(cur->dma_handler);
-	if (!cur->snapshot) {
-		ret = -ENOMEM;
-		goto failed;
-	}
-
-	cur->snapshot_size = cur->dma_handler->size;
-	if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
-		ret = -ENOMEM;
-		goto failed;
-	}
-
 	memset(cur->snapshot, 0, sizeof(*cur->snapshot));
 	cur->snapshot->start = sizeof(*cur->snapshot);
 	/* we should be ensure that can fit all fifo entries here */
@@ -523,12 +482,10 @@ static int css_gr_create_client_data(struct gk20a *g,
 	}
 
 	nvgpu_list_add_tail(&cur->list, &data->clients);
-	*client = cur;
 
 	return 0;
 
 failed:
-	*client = NULL;
 	if (cur)
 		css_gr_free_client_data(g, data, cur);
 
@@ -537,10 +494,9 @@ failed:
 
 
 int gr_gk20a_css_attach(struct channel_gk20a *ch,
-			u32 dmabuf_fd,
 			u32 perfmon_count,
 			u32 *perfmon_start,
-			struct gk20a_cs_snapshot_client **cs_client)
+			struct gk20a_cs_snapshot_client *cs_client)
 {
 	int ret = 0;
 	struct gk20a *g = ch->g;
@@ -555,7 +511,6 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 		return -EINVAL;
 
 	gr = &g->gr;
-	*cs_client = NULL;
 
 	nvgpu_mutex_acquire(&gr->cs_lock);
 
@@ -564,18 +519,17 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 		goto failed;
 
 	ret = css_gr_create_client_data(g, gr->cs_data,
-				     dmabuf_fd,
 				     perfmon_count,
 				     cs_client);
 	if (ret)
 		goto failed;
 
-	ret = g->ops.css.enable_snapshot(ch, *cs_client);
+	ret = g->ops.css.enable_snapshot(ch, cs_client);
 	if (ret)
 		goto failed;
 
 	if (perfmon_start)
-		*perfmon_start = (*cs_client)->perfmon_start;
+		*perfmon_start = cs_client->perfmon_start;
 
 	nvgpu_mutex_release(&gr->cs_lock);
 
@@ -583,9 +537,9 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 
 failed:
 	if (gr->cs_data) {
-		if (*cs_client) {
-			css_gr_free_client_data(g, gr->cs_data, *cs_client);
-			*cs_client = NULL;
+		if (cs_client) {
+			css_gr_free_client_data(g, gr->cs_data, cs_client);
+			cs_client = NULL;
 		}
 
 		if (nvgpu_list_empty(&gr->cs_data->clients))
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
index f0ad6044..b6ad9fac 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
@@ -28,6 +28,10 @@
 /* the minimal size of HW buffer - should be enough to avoid HW overflows */
 #define CSS_MIN_HW_SNAPSHOT_SIZE	(8 * 1024 * 1024)
 
+struct gk20a;
+struct gr_gk20a;
+struct channel_gk20a;
+
 /* cycle stats fifo header (must match NvSnapshotBufferFifo) */
 struct gk20a_cs_snapshot_fifo {
 	/* layout description of the buffer */
@@ -95,8 +99,6 @@ struct gk20a_cs_snapshot_fifo_entry {
 /* cycle stats snapshot client data (e.g. associated with channel) */
 struct gk20a_cs_snapshot_client {
 	struct nvgpu_list_node	list;
-	u32			dmabuf_fd;
-	struct dma_buf		*dma_handler;
 	struct gk20a_cs_snapshot_fifo	*snapshot;
 	u32			snapshot_size;
 	u32			perfmon_start;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 5a5809fc..14668dc6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -699,11 +699,10 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
-			u32 dmabuf_fd,		    /* in - dma mapped memory */
 			u32 perfmon_id_count,	    /* in - number of perfmons*/
 			u32 *perfmon_id_start,	    /* out- index of first pm */
-			/* out - pointer to client data used in later     */
-			struct gk20a_cs_snapshot_client **css_client);
+			/* in/out - pointer to client data used in later     */
+			struct gk20a_cs_snapshot_client *css_client);
 
 int gr_gk20a_css_detach(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *css_client);
-- 
cgit v1.2.2