gpu: nvgpu: move snapshot_client memory handling to linux

We right now store dmabuf fd and dma_buf pointer for gk20a_cs_snapshot_client But since dma_buf and all related APIs are linux specific, we need to remove them from common code and move them to linux specific code Add new linux specific structure gk20a_cs_snapshot_client_linux which includes struct gk20a_cs_snapshot_client and linux specific dma_buf pointer In gk20a_attach_cycle_stats_snapshot(), we first handle all dma_buf related operations and then call gr_gk20a_css_attach() Move gk20a_channel_free_cycle_stats_snapshot() to ioctl_channel.c In gk20a_channel_free_cycle_stats_snapshot(), we call gr_gk20a_css_detach() and then free up dma_buf in linux specific code We also need to call gk20a_channel_free_cycle_stats_snapshot() while closing the channel, so call it from linux specific nvgpu_channel_close_linux() Jira NVGPU-397 Jira NVGPU-415 Change-Id: Ida27240541f6adf31f28d7d7ee4f51651c6d3de2 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1603908 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2017-11-23 04:03:24 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-28 12:46:50 -0500
commit: 861b11a968b1f51f45832486e62bfe23fc29fc19 (patch)
tree: 3ec0870177b4ce66f151b916661df483d6b2847b
parent: 3fbb44d7576238d42635e2ca6501a17cdc7306f7 (diff)
8 files changed, 117 insertions, 85 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 1ae2d444..0ed596ac 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -29,6 +29,7 @@
 #include "gk20a/gk20a.h"
 #include "channel.h"
+#include "ioctl_channel.h"
 #include "os_linux.h"
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -242,6 +243,10 @@ static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
 static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
 {
        nvgpu_channel_work_completion_clear(ch);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        gk20a_channel_free_cycle_stats_snapshot(ch);
+#endif
 }
 static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 67bec31b..13355605 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -42,6 +42,11 @@
 #include "os_linux.h"
 #include "ctxsw_trace.h"
+/* the minimal size of client buffer */
+#define CSS_MIN_CLIENT_SNAPSHOT_SIZE                            \
+                (sizeof(struct gk20a_cs_snapshot_fifo) +        \
+                sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
 static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
 {
        switch (graphics_preempt_mode) {
@@ -157,18 +162,92 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
                                u32 perfmon_id_count,
                                u32 *perfmon_id_start)
 {
-        int ret;
+        int ret = 0;
+        struct gk20a *g = ch->g;
+        struct gk20a_cs_snapshot_client_linux *client_linux;
+        struct gk20a_cs_snapshot_client *client;
        nvgpu_mutex_acquire(&ch->cs_client_mutex);
        if (ch->cs_client) {
-                ret = -EEXIST;
+                nvgpu_mutex_release(&ch->cs_client_mutex);
-        } else {
+                return -EEXIST;
-                ret = gr_gk20a_css_attach(ch,
+        }
-                                        dmabuf_fd,
-                                        perfmon_id_count,
+        client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
-                                        perfmon_id_start,
+        if (!client_linux) {
-                                        &ch->cs_client);
+                ret = -ENOMEM;
+                goto err;
+        }
+        client_linux->dmabuf_fd   = dmabuf_fd;
+        client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
+        if (IS_ERR(client_linux->dma_handler)) {
+                ret = PTR_ERR(client_linux->dma_handler);
+                client_linux->dma_handler = NULL;
+                goto err_free;
+        }
+        client = &client_linux->cs_client;
+        client->snapshot_size = client_linux->dma_handler->size;
+        if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
+                ret = -ENOMEM;
+                goto err_put;
+        }
+        client->snapshot = (struct gk20a_cs_snapshot_fifo *)
+                                        dma_buf_vmap(client_linux->dma_handler);
+        if (!client->snapshot) {
+                ret = -ENOMEM;
+                goto err_put;
+        }
+        ch->cs_client = client;
+        ret = gr_gk20a_css_attach(ch,
+                                perfmon_id_count,
+                                perfmon_id_start,
+                                ch->cs_client);
+        nvgpu_mutex_release(&ch->cs_client_mutex);
+        return ret;
+err_put:
+        dma_buf_put(client_linux->dma_handler);
+err_free:
+        nvgpu_kfree(g, client_linux);
+err:
+        nvgpu_mutex_release(&ch->cs_client_mutex);
+        return ret;
+}
+int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
+{
+        int ret;
+        struct gk20a_cs_snapshot_client_linux *client_linux;
+        nvgpu_mutex_acquire(&ch->cs_client_mutex);
+        if (!ch->cs_client) {
+                nvgpu_mutex_release(&ch->cs_client_mutex);
+                return 0;
        }
+        client_linux = container_of(ch->cs_client,
+                                struct gk20a_cs_snapshot_client_linux,
+                                cs_client);
+        ret = gr_gk20a_css_detach(ch, ch->cs_client);
+        if (client_linux->dma_handler) {
+                if (ch->cs_client->snapshot)
+                        dma_buf_vunmap(client_linux->dma_handler,
+                                        ch->cs_client->snapshot);
+                dma_buf_put(client_linux->dma_handler);
+        }
+        ch->cs_client = NULL;
+        nvgpu_kfree(ch->g, client_linux);
        nvgpu_mutex_release(&ch->cs_client_mutex);
        return ret;
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
index 235d84ef..3ea8d765 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
@@ -15,11 +15,20 @@
 #include <linux/fs.h>
+#include "gk20a/css_gr_gk20a.h"
 struct inode;
 struct file;
 struct gk20a;
 struct nvgpu_channel_open_args;
+struct gk20a_cs_snapshot_client_linux {
+        struct gk20a_cs_snapshot_client cs_client;
+        u32                     dmabuf_fd;
+        struct dma_buf          *dma_handler;
+};
 int gk20a_channel_open(struct inode *inode, struct file *filp);
 int gk20a_channel_release(struct inode *inode, struct file *filp);
 long gk20a_channel_ioctl(struct file *filp,
@@ -27,6 +36,8 @@ long gk20a_channel_ioctl(struct file *filp,
 int gk20a_channel_open_ioctl(struct gk20a *g,
                struct nvgpu_channel_open_args *args);
+int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
 extern const struct file_operations gk20a_event_id_ops;
 extern const struct file_operations gk20a_channel_ops;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a0415861..dac38739 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -376,22 +376,6 @@ void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
        nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
 }
-int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
-{
-        int ret;
-        nvgpu_mutex_acquire(&ch->cs_client_mutex);
-        if (ch->cs_client) {
-                ret = gr_gk20a_css_detach(ch, ch->cs_client);
-                ch->cs_client = NULL;
-        } else {
-                ret = 0;
-        }
-        nvgpu_mutex_release(&ch->cs_client_mutex);
-        return ret;
-}
 #endif
 /* call ONLY when no references to the channel exist: after the last put */
@@ -508,7 +492,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 #if defined(CONFIG_GK20A_CYCLE_STATS)
        gk20a_channel_free_cycle_stats_buffer(ch);
-        gk20a_channel_free_cycle_stats_snapshot(ch);
 #endif
        channel_gk20a_free_priv_cmdbuf(ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index ff96d0d7..87ab6202 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -371,7 +371,6 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
                unsigned int num_inflight_jobs,
                u32 flags);
 void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
-int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index e3896981..afba2496 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -45,11 +45,6 @@
                ((cl)->perfmon_start <= (pm) &&                 \
                ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
-/* the minimal size of client buffer */
-#define CSS_MIN_CLIENT_SNAPSHOT_SIZE                            \
-                (sizeof(struct gk20a_cs_snapshot_fifo) +        \
-                sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
 /* address of fifo entry by offset */
 #define CSS_FIFO_ENTRY(fifo, offs)                              \
        ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
@@ -452,52 +447,16 @@ static int css_gr_free_client_data(struct gk20a *g,
                        ret = -EINVAL;
        }
-        if (client->dma_handler) {
-                if (client->snapshot)
-                        dma_buf_vunmap(client->dma_handler, client->snapshot);
-                dma_buf_put(client->dma_handler);
-        }
-        nvgpu_kfree(g, client);
        return ret;
 }
 static int css_gr_create_client_data(struct gk20a *g,
                        struct gk20a_cs_snapshot *data,
-                        u32 dmabuf_fd, u32 perfmon_count,
+                        u32 perfmon_count,
-                        struct gk20a_cs_snapshot_client **client)
+                        struct gk20a_cs_snapshot_client *cur)
 {
-        struct gk20a_cs_snapshot_client *cur;
        int ret = 0;
-        cur = nvgpu_kzalloc(g, sizeof(*cur));
-        if (!cur) {
-                ret = -ENOMEM;
-                goto failed;
-        }
-        cur->dmabuf_fd   = dmabuf_fd;
-        cur->dma_handler = dma_buf_get(cur->dmabuf_fd);
-        if (IS_ERR(cur->dma_handler)) {
-                ret = PTR_ERR(cur->dma_handler);
-                cur->dma_handler = NULL;
-                goto failed;
-        }
-        cur->snapshot = (struct gk20a_cs_snapshot_fifo *)
-                                        dma_buf_vmap(cur->dma_handler);
-        if (!cur->snapshot) {
-                ret = -ENOMEM;
-                goto failed;
-        }
-        cur->snapshot_size = cur->dma_handler->size;
-        if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
-                ret = -ENOMEM;
-                goto failed;
-        }
        memset(cur->snapshot, 0, sizeof(*cur->snapshot));
        cur->snapshot->start = sizeof(*cur->snapshot);
        /* we should be ensure that can fit all fifo entries here */
@@ -523,12 +482,10 @@ static int css_gr_create_client_data(struct gk20a *g,
        }
        nvgpu_list_add_tail(&cur->list, &data->clients);
-        *client = cur;
        return 0;
 failed:
-        *client = NULL;
        if (cur)
                css_gr_free_client_data(g, data, cur);
@@ -537,10 +494,9 @@ failed:
 int gr_gk20a_css_attach(struct channel_gk20a *ch,
-                        u32 dmabuf_fd,
                        u32 perfmon_count,
                        u32 *perfmon_start,
-                        struct gk20a_cs_snapshot_client **cs_client)
+                        struct gk20a_cs_snapshot_client *cs_client)
 {
        int ret = 0;
        struct gk20a *g = ch->g;
@@ -555,7 +511,6 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
                return -EINVAL;
        gr = &g->gr;
-        *cs_client = NULL;
        nvgpu_mutex_acquire(&gr->cs_lock);
@@ -564,18 +519,17 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
                goto failed;
        ret = css_gr_create_client_data(g, gr->cs_data,
-                                     dmabuf_fd,
                                     perfmon_count,
                                     cs_client);
        if (ret)
                goto failed;
-        ret = g->ops.css.enable_snapshot(ch, *cs_client);
+        ret = g->ops.css.enable_snapshot(ch, cs_client);
        if (ret)
                goto failed;
        if (perfmon_start)
-                *perfmon_start = (*cs_client)->perfmon_start;
+                *perfmon_start = cs_client->perfmon_start;
        nvgpu_mutex_release(&gr->cs_lock);
@@ -583,9 +537,9 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 failed:
        if (gr->cs_data) {
-                if (*cs_client) {
+                if (cs_client) {
-                        css_gr_free_client_data(g, gr->cs_data, *cs_client);
+                        css_gr_free_client_data(g, gr->cs_data, cs_client);
-                        *cs_client = NULL;
+                        cs_client = NULL;
                }
                if (nvgpu_list_empty(&gr->cs_data->clients))
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
index f0ad6044..b6ad9fac 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
@@ -28,6 +28,10 @@
 /* the minimal size of HW buffer - should be enough to avoid HW overflows */
 #define CSS_MIN_HW_SNAPSHOT_SIZE        (8 * 1024 * 1024)
+struct gk20a;
+struct gr_gk20a;
+struct channel_gk20a;
 /* cycle stats fifo header (must match NvSnapshotBufferFifo) */
 struct gk20a_cs_snapshot_fifo {
        /* layout description of the buffer */
@@ -95,8 +99,6 @@ struct gk20a_cs_snapshot_fifo_entry {
 /* cycle stats snapshot client data (e.g. associated with channel) */
 struct gk20a_cs_snapshot_client {
        struct nvgpu_list_node  list;
-        u32                     dmabuf_fd;
-        struct dma_buf          *dma_handler;
        struct gk20a_cs_snapshot_fifo   *snapshot;
        u32                     snapshot_size;
        u32                     perfmon_start;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 5a5809fc..14668dc6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -699,11 +699,10 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
-                        u32 dmabuf_fd,              /* in - dma mapped memory */
                        u32 perfmon_id_count,       /* in - number of perfmons*/
                        u32 *perfmon_id_start,      /* out- index of first pm */
-                        /* out - pointer to client data used in later     */
+                        /* in/out - pointer to client data used in later     */
-                        struct gk20a_cs_snapshot_client **css_client);
+                        struct gk20a_cs_snapshot_client *css_client);
 int gr_gk20a_css_detach(struct channel_gk20a *ch,
                                struct gk20a_cs_snapshot_client *css_client);
author	Deepak Nibade <dnibade@nvidia.com>	2017-11-23 04:03:24 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-28 12:46:50 -0500
commit	861b11a968b1f51f45832486e62bfe23fc29fc19 (patch)
tree	3ec0870177b4ce66f151b916661df483d6b2847b
parent	3fbb44d7576238d42635e2ca6501a17cdc7306f7 (diff)

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 1ae2d444..0ed596ac 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -29,6 +29,7 @@
29	#include "gk20a/gk20a.h"	29	#include "gk20a/gk20a.h"
30		30
31	#include "channel.h"	31	#include "channel.h"
		32	#include "ioctl_channel.h"
32	#include "os_linux.h"	33	#include "os_linux.h"
33		34
34	#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>	35	#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -242,6 +243,10 @@ static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
242	static void nvgpu_channel_close_linux(struct channel_gk20a *ch)	243	static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
243	{	244	{
244	nvgpu_channel_work_completion_clear(ch);	245	nvgpu_channel_work_completion_clear(ch);
		246
		247	#if defined(CONFIG_GK20A_CYCLE_STATS)
		248	gk20a_channel_free_cycle_stats_snapshot(ch);
		249	#endif
245	}	250	}
246		251
247	static int nvgpu_channel_alloc_linux(struct gk20a g, struct channel_gk20a ch)	252	static int nvgpu_channel_alloc_linux(struct gk20a g, struct channel_gk20a ch)


diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 67bec31b..13355605 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -42,6 +42,11 @@
42	#include "os_linux.h"	42	#include "os_linux.h"
43	#include "ctxsw_trace.h"	43	#include "ctxsw_trace.h"
44		44
		45	/* the minimal size of client buffer */
		46	#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
		47	(sizeof(struct gk20a_cs_snapshot_fifo) + \
		48	sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
		49
45	static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)	50	static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
46	{	51	{
47	switch (graphics_preempt_mode) {	52	switch (graphics_preempt_mode) {
@@ -157,18 +162,92 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
157	u32 perfmon_id_count,	162	u32 perfmon_id_count,
158	u32 *perfmon_id_start)	163	u32 *perfmon_id_start)
159	{	164	{
160	int ret;	165	int ret = 0;
		166	struct gk20a *g = ch->g;
		167	struct gk20a_cs_snapshot_client_linux *client_linux;
		168	struct gk20a_cs_snapshot_client *client;
161		169
162	nvgpu_mutex_acquire(&ch->cs_client_mutex);	170	nvgpu_mutex_acquire(&ch->cs_client_mutex);
163	if (ch->cs_client) {	171	if (ch->cs_client) {
164	ret = -EEXIST;	172	nvgpu_mutex_release(&ch->cs_client_mutex);
165	} else {	173	return -EEXIST;
166	ret = gr_gk20a_css_attach(ch,	174	}
167	dmabuf_fd,	175
168	perfmon_id_count,	176	client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
169	perfmon_id_start,	177	if (!client_linux) {
170	&ch->cs_client);	178	ret = -ENOMEM;
		179	goto err;
		180	}
		181
		182	client_linux->dmabuf_fd = dmabuf_fd;
		183	client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
		184	if (IS_ERR(client_linux->dma_handler)) {
		185	ret = PTR_ERR(client_linux->dma_handler);
		186	client_linux->dma_handler = NULL;
		187	goto err_free;
		188	}
		189
		190	client = &client_linux->cs_client;
		191	client->snapshot_size = client_linux->dma_handler->size;
		192	if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
		193	ret = -ENOMEM;
		194	goto err_put;
		195	}
		196
		197	client->snapshot = (struct gk20a_cs_snapshot_fifo *)
		198	dma_buf_vmap(client_linux->dma_handler);
		199	if (!client->snapshot) {
		200	ret = -ENOMEM;
		201	goto err_put;
		202	}
		203
		204	ch->cs_client = client;
		205
		206	ret = gr_gk20a_css_attach(ch,
		207	perfmon_id_count,
		208	perfmon_id_start,
		209	ch->cs_client);
		210
		211	nvgpu_mutex_release(&ch->cs_client_mutex);
		212
		213	return ret;
		214
		215	err_put:
		216	dma_buf_put(client_linux->dma_handler);
		217	err_free:
		218	nvgpu_kfree(g, client_linux);
		219	err:
		220	nvgpu_mutex_release(&ch->cs_client_mutex);
		221	return ret;
		222	}
		223
		224	int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
		225	{
		226	int ret;
		227	struct gk20a_cs_snapshot_client_linux *client_linux;
		228
		229	nvgpu_mutex_acquire(&ch->cs_client_mutex);
		230	if (!ch->cs_client) {
		231	nvgpu_mutex_release(&ch->cs_client_mutex);
		232	return 0;
171	}	233	}
		234
		235	client_linux = container_of(ch->cs_client,
		236	struct gk20a_cs_snapshot_client_linux,
		237	cs_client);
		238
		239	ret = gr_gk20a_css_detach(ch, ch->cs_client);
		240
		241	if (client_linux->dma_handler) {
		242	if (ch->cs_client->snapshot)
		243	dma_buf_vunmap(client_linux->dma_handler,
		244	ch->cs_client->snapshot);
		245	dma_buf_put(client_linux->dma_handler);
		246	}
		247
		248	ch->cs_client = NULL;
		249	nvgpu_kfree(ch->g, client_linux);
		250
172	nvgpu_mutex_release(&ch->cs_client_mutex);	251	nvgpu_mutex_release(&ch->cs_client_mutex);
173		252
174	return ret;	253	return ret;


diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h index 235d84ef..3ea8d765 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
@@ -15,11 +15,20 @@
15		15
16	#include <linux/fs.h>	16	#include <linux/fs.h>
17		17
		18	#include "gk20a/css_gr_gk20a.h"
		19
18	struct inode;	20	struct inode;
19	struct file;	21	struct file;
20	struct gk20a;	22	struct gk20a;
21	struct nvgpu_channel_open_args;	23	struct nvgpu_channel_open_args;
22		24
		25	struct gk20a_cs_snapshot_client_linux {
		26	struct gk20a_cs_snapshot_client cs_client;
		27
		28	u32 dmabuf_fd;
		29	struct dma_buf *dma_handler;
		30	};
		31
23	int gk20a_channel_open(struct inode inode, struct file filp);	32	int gk20a_channel_open(struct inode inode, struct file filp);
24	int gk20a_channel_release(struct inode inode, struct file filp);	33	int gk20a_channel_release(struct inode inode, struct file filp);
25	long gk20a_channel_ioctl(struct file *filp,	34	long gk20a_channel_ioctl(struct file *filp,
@@ -27,6 +36,8 @@ long gk20a_channel_ioctl(struct file *filp,
27	int gk20a_channel_open_ioctl(struct gk20a *g,	36	int gk20a_channel_open_ioctl(struct gk20a *g,
28	struct nvgpu_channel_open_args *args);	37	struct nvgpu_channel_open_args *args);
29		38
		39	int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
		40
30	extern const struct file_operations gk20a_event_id_ops;	41	extern const struct file_operations gk20a_event_id_ops;
31	extern const struct file_operations gk20a_channel_ops;	42	extern const struct file_operations gk20a_channel_ops;
32		43


diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a0415861..dac38739 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -376,22 +376,6 @@ void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
376	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);	376	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
377	}	377	}
378		378
379	int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
380	{
381	int ret;
382
383	nvgpu_mutex_acquire(&ch->cs_client_mutex);
384	if (ch->cs_client) {
385	ret = gr_gk20a_css_detach(ch, ch->cs_client);
386	ch->cs_client = NULL;
387	} else {
388	ret = 0;
389	}
390	nvgpu_mutex_release(&ch->cs_client_mutex);
391
392	return ret;
393	}
394
395	#endif	379	#endif
396		380
397	/* call ONLY when no references to the channel exist: after the last put */	381	/* call ONLY when no references to the channel exist: after the last put */
@@ -508,7 +492,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
508		492
509	#if defined(CONFIG_GK20A_CYCLE_STATS)	493	#if defined(CONFIG_GK20A_CYCLE_STATS)
510	gk20a_channel_free_cycle_stats_buffer(ch);	494	gk20a_channel_free_cycle_stats_buffer(ch);
511	gk20a_channel_free_cycle_stats_snapshot(ch);
512	#endif	495	#endif
513		496
514	channel_gk20a_free_priv_cmdbuf(ch);	497	channel_gk20a_free_priv_cmdbuf(ch);


diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index ff96d0d7..87ab6202 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -371,7 +371,6 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
371	unsigned int num_inflight_jobs,	371	unsigned int num_inflight_jobs,
372	u32 flags);	372	u32 flags);
373	void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);	373	void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
374	int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
375		374
376	void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);	375	void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
377		376


diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index e3896981..afba2496 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -45,11 +45,6 @@
45	((cl)->perfmon_start <= (pm) && \	45	((cl)->perfmon_start <= (pm) && \
46	((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)	46	((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
47		47
48	/* the minimal size of client buffer */
49	#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
50	(sizeof(struct gk20a_cs_snapshot_fifo) + \
51	sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
52
53	/* address of fifo entry by offset */	48	/* address of fifo entry by offset */
54	#define CSS_FIFO_ENTRY(fifo, offs) \	49	#define CSS_FIFO_ENTRY(fifo, offs) \
55	((struct gk20a_cs_snapshot_fifo_entry )(((char )(fifo)) + (offs)))	50	((struct gk20a_cs_snapshot_fifo_entry )(((char )(fifo)) + (offs)))
@@ -452,52 +447,16 @@ static int css_gr_free_client_data(struct gk20a *g,
452	ret = -EINVAL;	447	ret = -EINVAL;
453	}	448	}
454		449
455	if (client->dma_handler) {
456	if (client->snapshot)
457	dma_buf_vunmap(client->dma_handler, client->snapshot);
458	dma_buf_put(client->dma_handler);
459	}
460
461	nvgpu_kfree(g, client);
462
463	return ret;	450	return ret;
464	}	451	}
465		452
466	static int css_gr_create_client_data(struct gk20a *g,	453	static int css_gr_create_client_data(struct gk20a *g,
467	struct gk20a_cs_snapshot *data,	454	struct gk20a_cs_snapshot *data,
468	u32 dmabuf_fd, u32 perfmon_count,	455	u32 perfmon_count,
469	struct gk20a_cs_snapshot_client **client)	456	struct gk20a_cs_snapshot_client *cur)
470	{	457	{
471	struct gk20a_cs_snapshot_client *cur;
472	int ret = 0;	458	int ret = 0;
473		459
474	cur = nvgpu_kzalloc(g, sizeof(*cur));
475	if (!cur) {
476	ret = -ENOMEM;
477	goto failed;
478	}
479
480	cur->dmabuf_fd = dmabuf_fd;
481	cur->dma_handler = dma_buf_get(cur->dmabuf_fd);
482	if (IS_ERR(cur->dma_handler)) {
483	ret = PTR_ERR(cur->dma_handler);
484	cur->dma_handler = NULL;
485	goto failed;
486	}
487
488	cur->snapshot = (struct gk20a_cs_snapshot_fifo *)
489	dma_buf_vmap(cur->dma_handler);
490	if (!cur->snapshot) {
491	ret = -ENOMEM;
492	goto failed;
493	}
494
495	cur->snapshot_size = cur->dma_handler->size;
496	if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
497	ret = -ENOMEM;
498	goto failed;
499	}
500
501	memset(cur->snapshot, 0, sizeof(*cur->snapshot));	460	memset(cur->snapshot, 0, sizeof(*cur->snapshot));
502	cur->snapshot->start = sizeof(*cur->snapshot);	461	cur->snapshot->start = sizeof(*cur->snapshot);
503	/* we should be ensure that can fit all fifo entries here */	462	/* we should be ensure that can fit all fifo entries here */
@@ -523,12 +482,10 @@ static int css_gr_create_client_data(struct gk20a *g,
523	}	482	}
524		483
525	nvgpu_list_add_tail(&cur->list, &data->clients);	484	nvgpu_list_add_tail(&cur->list, &data->clients);
526	*client = cur;
527		485
528	return 0;	486	return 0;
529		487
530	failed:	488	failed:
531	*client = NULL;
532	if (cur)	489	if (cur)
533	css_gr_free_client_data(g, data, cur);	490	css_gr_free_client_data(g, data, cur);
534		491
@@ -537,10 +494,9 @@ failed:
537		494
538		495
539	int gr_gk20a_css_attach(struct channel_gk20a *ch,	496	int gr_gk20a_css_attach(struct channel_gk20a *ch,
540	u32 dmabuf_fd,
541	u32 perfmon_count,	497	u32 perfmon_count,
542	u32 *perfmon_start,	498	u32 *perfmon_start,
543	struct gk20a_cs_snapshot_client **cs_client)	499	struct gk20a_cs_snapshot_client *cs_client)
544	{	500	{
545	int ret = 0;	501	int ret = 0;
546	struct gk20a *g = ch->g;	502	struct gk20a *g = ch->g;
@@ -555,7 +511,6 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
555	return -EINVAL;	511	return -EINVAL;
556		512
557	gr = &g->gr;	513	gr = &g->gr;
558	*cs_client = NULL;
559		514
560	nvgpu_mutex_acquire(&gr->cs_lock);	515	nvgpu_mutex_acquire(&gr->cs_lock);
561		516
@@ -564,18 +519,17 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
564	goto failed;	519	goto failed;
565		520
566	ret = css_gr_create_client_data(g, gr->cs_data,	521	ret = css_gr_create_client_data(g, gr->cs_data,
567	dmabuf_fd,
568	perfmon_count,	522	perfmon_count,
569	cs_client);	523	cs_client);
570	if (ret)	524	if (ret)
571	goto failed;	525	goto failed;
572		526
573	ret = g->ops.css.enable_snapshot(ch, *cs_client);	527	ret = g->ops.css.enable_snapshot(ch, cs_client);
574	if (ret)	528	if (ret)
575	goto failed;	529	goto failed;
576		530
577	if (perfmon_start)	531	if (perfmon_start)
578	perfmon_start = (cs_client)->perfmon_start;	532	*perfmon_start = cs_client->perfmon_start;
579		533
580	nvgpu_mutex_release(&gr->cs_lock);	534	nvgpu_mutex_release(&gr->cs_lock);
581		535
@@ -583,9 +537,9 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
583		537
584	failed:	538	failed:
585	if (gr->cs_data) {	539	if (gr->cs_data) {
586	if (*cs_client) {	540	if (cs_client) {
587	css_gr_free_client_data(g, gr->cs_data, *cs_client);	541	css_gr_free_client_data(g, gr->cs_data, cs_client);
588	*cs_client = NULL;	542	cs_client = NULL;
589	}	543	}
590		544
591	if (nvgpu_list_empty(&gr->cs_data->clients))	545	if (nvgpu_list_empty(&gr->cs_data->clients))


diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h index f0ad6044..b6ad9fac 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
@@ -28,6 +28,10 @@
28	/* the minimal size of HW buffer - should be enough to avoid HW overflows */	28	/* the minimal size of HW buffer - should be enough to avoid HW overflows */
29	#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)	29	#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
30		30
		31	struct gk20a;
		32	struct gr_gk20a;
		33	struct channel_gk20a;
		34
31	/* cycle stats fifo header (must match NvSnapshotBufferFifo) */	35	/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32	struct gk20a_cs_snapshot_fifo {	36	struct gk20a_cs_snapshot_fifo {
33	/* layout description of the buffer */	37	/* layout description of the buffer */
@@ -95,8 +99,6 @@ struct gk20a_cs_snapshot_fifo_entry {
95	/* cycle stats snapshot client data (e.g. associated with channel) */	99	/* cycle stats snapshot client data (e.g. associated with channel) */
96	struct gk20a_cs_snapshot_client {	100	struct gk20a_cs_snapshot_client {
97	struct nvgpu_list_node list;	101	struct nvgpu_list_node list;
98	u32 dmabuf_fd;
99	struct dma_buf *dma_handler;
100	struct gk20a_cs_snapshot_fifo *snapshot;	102	struct gk20a_cs_snapshot_fifo *snapshot;
101	u32 snapshot_size;	103	u32 snapshot_size;
102	u32 perfmon_start;	104	u32 perfmon_start;


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 5a5809fc..14668dc6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -699,11 +699,10 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
699		699
700	#if defined(CONFIG_GK20A_CYCLE_STATS)	700	#if defined(CONFIG_GK20A_CYCLE_STATS)
701	int gr_gk20a_css_attach(struct channel_gk20a ch, / in - main hw structure */	701	int gr_gk20a_css_attach(struct channel_gk20a ch, / in - main hw structure */
702	u32 dmabuf_fd, /* in - dma mapped memory */
703	u32 perfmon_id_count, /* in - number of perfmons*/	702	u32 perfmon_id_count, /* in - number of perfmons*/
704	u32 perfmon_id_start, / out- index of first pm */	703	u32 perfmon_id_start, / out- index of first pm */
705	/* out - pointer to client data used in later */	704	/* in/out - pointer to client data used in later */
706	struct gk20a_cs_snapshot_client **css_client);	705	struct gk20a_cs_snapshot_client *css_client);
707		706
708	int gr_gk20a_css_detach(struct channel_gk20a *ch,	707	int gr_gk20a_css_detach(struct channel_gk20a *ch,
709	struct gk20a_cs_snapshot_client *css_client);	708	struct gk20a_cs_snapshot_client *css_client);