From 861b11a968b1f51f45832486e62bfe23fc29fc19 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 23 Nov 2017 01:03:24 -0800 Subject: gpu: nvgpu: move snapshot_client memory handling to linux We right now store dmabuf fd and dma_buf pointer for gk20a_cs_snapshot_client But since dma_buf and all related APIs are linux specific, we need to remove them from common code and move them to linux specific code Add new linux specific structure gk20a_cs_snapshot_client_linux which includes struct gk20a_cs_snapshot_client and linux specific dma_buf pointer In gk20a_attach_cycle_stats_snapshot(), we first handle all dma_buf related operations and then call gr_gk20a_css_attach() Move gk20a_channel_free_cycle_stats_snapshot() to ioctl_channel.c In gk20a_channel_free_cycle_stats_snapshot(), we call gr_gk20a_css_detach() and then free up dma_buf in linux specific code We also need to call gk20a_channel_free_cycle_stats_snapshot() while closing the channel, so call it from linux specific nvgpu_channel_close_linux() Jira NVGPU-397 Jira NVGPU-415 Change-Id: Ida27240541f6adf31f28d7d7ee4f51651c6d3de2 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1603908 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/channel.c | 5 ++ drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 95 +++++++++++++++++++++++--- drivers/gpu/nvgpu/common/linux/ioctl_channel.h | 11 +++ drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 17 ----- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 - drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 62 +++-------------- drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | 6 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 5 +- 8 files changed, 117 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 1ae2d444..0ed596ac 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c @@ -29,6 +29,7 @@ #include "gk20a/gk20a.h" #include "channel.h" +#include "ioctl_channel.h" #include "os_linux.h" #include @@ -242,6 +243,10 @@ static void nvgpu_channel_open_linux(struct channel_gk20a *ch) static void nvgpu_channel_close_linux(struct channel_gk20a *ch) { nvgpu_channel_work_completion_clear(ch); + +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_channel_free_cycle_stats_snapshot(ch); +#endif } static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 67bec31b..13355605 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -42,6 +42,11 @@ #include "os_linux.h" #include "ctxsw_trace.h" +/* the minimal size of client buffer */ +#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ + (sizeof(struct gk20a_cs_snapshot_fifo) + \ + sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) + static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) { switch (graphics_preempt_mode) { @@ -157,18 +162,92 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, u32 perfmon_id_count, u32 *perfmon_id_start) { - int ret; + int ret = 0; + struct gk20a *g = ch->g; + struct gk20a_cs_snapshot_client_linux *client_linux; + struct gk20a_cs_snapshot_client *client; nvgpu_mutex_acquire(&ch->cs_client_mutex); if (ch->cs_client) { - ret = -EEXIST; - } else { - ret = gr_gk20a_css_attach(ch, - dmabuf_fd, - perfmon_id_count, - perfmon_id_start, - &ch->cs_client); + nvgpu_mutex_release(&ch->cs_client_mutex); + return -EEXIST; + } + + client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); + if (!client_linux) { + ret = -ENOMEM; + goto err; + } + + client_linux->dmabuf_fd = dmabuf_fd; + client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); + if (IS_ERR(client_linux->dma_handler)) { + ret = PTR_ERR(client_linux->dma_handler); + client_linux->dma_handler = NULL; + goto err_free; + } + + client = &client_linux->cs_client; + client->snapshot_size = client_linux->dma_handler->size; + if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { + ret = -ENOMEM; + goto err_put; + } + + client->snapshot = (struct gk20a_cs_snapshot_fifo *) + dma_buf_vmap(client_linux->dma_handler); + if (!client->snapshot) { + ret = -ENOMEM; + goto err_put; + } + + ch->cs_client = client; + + ret = gr_gk20a_css_attach(ch, + perfmon_id_count, + perfmon_id_start, + ch->cs_client); + + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; + +err_put: + dma_buf_put(client_linux->dma_handler); +err_free: + nvgpu_kfree(g, client_linux); +err: + nvgpu_mutex_release(&ch->cs_client_mutex); + return ret; +} + +int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) +{ + int ret; + struct gk20a_cs_snapshot_client_linux *client_linux; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (!ch->cs_client) { + nvgpu_mutex_release(&ch->cs_client_mutex); + return 0; } + + client_linux = container_of(ch->cs_client, + struct gk20a_cs_snapshot_client_linux, + cs_client); + + ret = gr_gk20a_css_detach(ch, ch->cs_client); + + if (client_linux->dma_handler) { + if (ch->cs_client->snapshot) + dma_buf_vunmap(client_linux->dma_handler, + ch->cs_client->snapshot); + dma_buf_put(client_linux->dma_handler); + } + + ch->cs_client = NULL; + nvgpu_kfree(ch->g, client_linux); + nvgpu_mutex_release(&ch->cs_client_mutex); return ret; diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h index 235d84ef..3ea8d765 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h @@ -15,11 +15,20 @@ #include +#include "gk20a/css_gr_gk20a.h" + struct inode; struct file; struct gk20a; struct nvgpu_channel_open_args; +struct gk20a_cs_snapshot_client_linux { + struct gk20a_cs_snapshot_client cs_client; + + u32 dmabuf_fd; + struct dma_buf *dma_handler; +}; + int gk20a_channel_open(struct inode *inode, struct file *filp); int gk20a_channel_release(struct inode *inode, struct file *filp); long gk20a_channel_ioctl(struct file *filp, @@ -27,6 +36,8 @@ long gk20a_channel_ioctl(struct file *filp, int gk20a_channel_open_ioctl(struct gk20a *g, struct nvgpu_channel_open_args *args); +int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); + extern const struct file_operations gk20a_event_id_ops; extern const struct file_operations gk20a_channel_ops; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a0415861..dac38739 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -376,22 +376,6 @@ void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); } -int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) -{ - int ret; - - nvgpu_mutex_acquire(&ch->cs_client_mutex); - if (ch->cs_client) { - ret = gr_gk20a_css_detach(ch, ch->cs_client); - ch->cs_client = NULL; - } else { - ret = 0; - } - nvgpu_mutex_release(&ch->cs_client_mutex); - - return ret; -} - #endif /* call ONLY when no references to the channel exist: after the last put */ @@ -508,7 +492,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) #if defined(CONFIG_GK20A_CYCLE_STATS) gk20a_channel_free_cycle_stats_buffer(ch); - gk20a_channel_free_cycle_stats_snapshot(ch); #endif channel_gk20a_free_priv_cmdbuf(ch); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index ff96d0d7..87ab6202 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -371,7 +371,6 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, unsigned int num_inflight_jobs, u32 flags); void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); -int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index e3896981..afba2496 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c @@ -45,11 +45,6 @@ ((cl)->perfmon_start <= (pm) && \ ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) -/* the minimal size of client buffer */ -#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ - (sizeof(struct gk20a_cs_snapshot_fifo) + \ - sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) - /* address of fifo entry by offset */ #define CSS_FIFO_ENTRY(fifo, offs) \ ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs))) @@ -452,52 +447,16 @@ static int css_gr_free_client_data(struct gk20a *g, ret = -EINVAL; } - if (client->dma_handler) { - if (client->snapshot) - dma_buf_vunmap(client->dma_handler, client->snapshot); - dma_buf_put(client->dma_handler); - } - - nvgpu_kfree(g, client); - return ret; } static int css_gr_create_client_data(struct gk20a *g, struct gk20a_cs_snapshot *data, - u32 dmabuf_fd, u32 perfmon_count, - struct gk20a_cs_snapshot_client **client) + u32 perfmon_count, + struct gk20a_cs_snapshot_client *cur) { - struct gk20a_cs_snapshot_client *cur; int ret = 0; - cur = nvgpu_kzalloc(g, sizeof(*cur)); - if (!cur) { - ret = -ENOMEM; - goto failed; - } - - cur->dmabuf_fd = dmabuf_fd; - cur->dma_handler = dma_buf_get(cur->dmabuf_fd); - if (IS_ERR(cur->dma_handler)) { - ret = PTR_ERR(cur->dma_handler); - cur->dma_handler = NULL; - goto failed; - } - - cur->snapshot = (struct gk20a_cs_snapshot_fifo *) - dma_buf_vmap(cur->dma_handler); - if (!cur->snapshot) { - ret = -ENOMEM; - goto failed; - } - - cur->snapshot_size = cur->dma_handler->size; - if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { - ret = -ENOMEM; - goto failed; - } - memset(cur->snapshot, 0, sizeof(*cur->snapshot)); cur->snapshot->start = sizeof(*cur->snapshot); /* we should be ensure that can fit all fifo entries here */ @@ -523,12 +482,10 @@ static int css_gr_create_client_data(struct gk20a *g, } nvgpu_list_add_tail(&cur->list, &data->clients); - *client = cur; return 0; failed: - *client = NULL; if (cur) css_gr_free_client_data(g, data, cur); @@ -537,10 +494,9 @@ failed: int gr_gk20a_css_attach(struct channel_gk20a *ch, - u32 dmabuf_fd, u32 perfmon_count, u32 *perfmon_start, - struct gk20a_cs_snapshot_client **cs_client) + struct gk20a_cs_snapshot_client *cs_client) { int ret = 0; struct gk20a *g = ch->g; @@ -555,7 +511,6 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch, return -EINVAL; gr = &g->gr; - *cs_client = NULL; nvgpu_mutex_acquire(&gr->cs_lock); @@ -564,18 +519,17 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch, goto failed; ret = css_gr_create_client_data(g, gr->cs_data, - dmabuf_fd, perfmon_count, cs_client); if (ret) goto failed; - ret = g->ops.css.enable_snapshot(ch, *cs_client); + ret = g->ops.css.enable_snapshot(ch, cs_client); if (ret) goto failed; if (perfmon_start) - *perfmon_start = (*cs_client)->perfmon_start; + *perfmon_start = cs_client->perfmon_start; nvgpu_mutex_release(&gr->cs_lock); @@ -583,9 +537,9 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch, failed: if (gr->cs_data) { - if (*cs_client) { - css_gr_free_client_data(g, gr->cs_data, *cs_client); - *cs_client = NULL; + if (cs_client) { + css_gr_free_client_data(g, gr->cs_data, cs_client); + cs_client = NULL; } if (nvgpu_list_empty(&gr->cs_data->clients)) diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h index f0ad6044..b6ad9fac 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h @@ -28,6 +28,10 @@ /* the minimal size of HW buffer - should be enough to avoid HW overflows */ #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) +struct gk20a; +struct gr_gk20a; +struct channel_gk20a; + /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ struct gk20a_cs_snapshot_fifo { /* layout description of the buffer */ @@ -95,8 +99,6 @@ struct gk20a_cs_snapshot_fifo_entry { /* cycle stats snapshot client data (e.g. associated with channel) */ struct gk20a_cs_snapshot_client { struct nvgpu_list_node list; - u32 dmabuf_fd; - struct dma_buf *dma_handler; struct gk20a_cs_snapshot_fifo *snapshot; u32 snapshot_size; u32 perfmon_start; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 5a5809fc..14668dc6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -699,11 +699,10 @@ int gr_gk20a_halt_pipe(struct gk20a *g); #if defined(CONFIG_GK20A_CYCLE_STATS) int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ - u32 dmabuf_fd, /* in - dma mapped memory */ u32 perfmon_id_count, /* in - number of perfmons*/ u32 *perfmon_id_start, /* out- index of first pm */ - /* out - pointer to client data used in later */ - struct gk20a_cs_snapshot_client **css_client); + /* in/out - pointer to client data used in later */ + struct gk20a_cs_snapshot_client *css_client); int gr_gk20a_css_detach(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *css_client); -- cgit v1.2.2