summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-11-23 04:03:24 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-28 12:46:50 -0500
commit861b11a968b1f51f45832486e62bfe23fc29fc19 (patch)
tree3ec0870177b4ce66f151b916661df483d6b2847b
parent3fbb44d7576238d42635e2ca6501a17cdc7306f7 (diff)
gpu: nvgpu: move snapshot_client memory handling to linux
We right now store dmabuf fd and dma_buf pointer for gk20a_cs_snapshot_client But since dma_buf and all related APIs are linux specific, we need to remove them from common code and move them to linux specific code Add new linux specific structure gk20a_cs_snapshot_client_linux which includes struct gk20a_cs_snapshot_client and linux specific dma_buf pointer In gk20a_attach_cycle_stats_snapshot(), we first handle all dma_buf related operations and then call gr_gk20a_css_attach() Move gk20a_channel_free_cycle_stats_snapshot() to ioctl_channel.c In gk20a_channel_free_cycle_stats_snapshot(), we call gr_gk20a_css_detach() and then free up dma_buf in linux specific code We also need to call gk20a_channel_free_cycle_stats_snapshot() while closing the channel, so call it from linux specific nvgpu_channel_close_linux() Jira NVGPU-397 Jira NVGPU-415 Change-Id: Ida27240541f6adf31f28d7d7ee4f51651c6d3de2 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1603908 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c5
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c95
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.h11
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c17
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c62
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h5
8 files changed, 117 insertions, 85 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index 1ae2d444..0ed596ac 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -29,6 +29,7 @@
29#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
30 30
31#include "channel.h" 31#include "channel.h"
32#include "ioctl_channel.h"
32#include "os_linux.h" 33#include "os_linux.h"
33 34
34#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 35#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -242,6 +243,10 @@ static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
242static void nvgpu_channel_close_linux(struct channel_gk20a *ch) 243static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
243{ 244{
244 nvgpu_channel_work_completion_clear(ch); 245 nvgpu_channel_work_completion_clear(ch);
246
247#if defined(CONFIG_GK20A_CYCLE_STATS)
248 gk20a_channel_free_cycle_stats_snapshot(ch);
249#endif
245} 250}
246 251
247static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) 252static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 67bec31b..13355605 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -42,6 +42,11 @@
42#include "os_linux.h" 42#include "os_linux.h"
43#include "ctxsw_trace.h" 43#include "ctxsw_trace.h"
44 44
45/* the minimal size of client buffer */
46#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
47 (sizeof(struct gk20a_cs_snapshot_fifo) + \
48 sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
49
45static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) 50static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
46{ 51{
47 switch (graphics_preempt_mode) { 52 switch (graphics_preempt_mode) {
@@ -157,18 +162,92 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
157 u32 perfmon_id_count, 162 u32 perfmon_id_count,
158 u32 *perfmon_id_start) 163 u32 *perfmon_id_start)
159{ 164{
160 int ret; 165 int ret = 0;
166 struct gk20a *g = ch->g;
167 struct gk20a_cs_snapshot_client_linux *client_linux;
168 struct gk20a_cs_snapshot_client *client;
161 169
162 nvgpu_mutex_acquire(&ch->cs_client_mutex); 170 nvgpu_mutex_acquire(&ch->cs_client_mutex);
163 if (ch->cs_client) { 171 if (ch->cs_client) {
164 ret = -EEXIST; 172 nvgpu_mutex_release(&ch->cs_client_mutex);
165 } else { 173 return -EEXIST;
166 ret = gr_gk20a_css_attach(ch, 174 }
167 dmabuf_fd, 175
168 perfmon_id_count, 176 client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
169 perfmon_id_start, 177 if (!client_linux) {
170 &ch->cs_client); 178 ret = -ENOMEM;
179 goto err;
180 }
181
182 client_linux->dmabuf_fd = dmabuf_fd;
183 client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
184 if (IS_ERR(client_linux->dma_handler)) {
185 ret = PTR_ERR(client_linux->dma_handler);
186 client_linux->dma_handler = NULL;
187 goto err_free;
188 }
189
190 client = &client_linux->cs_client;
191 client->snapshot_size = client_linux->dma_handler->size;
192 if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
193 ret = -ENOMEM;
194 goto err_put;
195 }
196
197 client->snapshot = (struct gk20a_cs_snapshot_fifo *)
198 dma_buf_vmap(client_linux->dma_handler);
199 if (!client->snapshot) {
200 ret = -ENOMEM;
201 goto err_put;
202 }
203
204 ch->cs_client = client;
205
206 ret = gr_gk20a_css_attach(ch,
207 perfmon_id_count,
208 perfmon_id_start,
209 ch->cs_client);
210
211 nvgpu_mutex_release(&ch->cs_client_mutex);
212
213 return ret;
214
215err_put:
216 dma_buf_put(client_linux->dma_handler);
217err_free:
218 nvgpu_kfree(g, client_linux);
219err:
220 nvgpu_mutex_release(&ch->cs_client_mutex);
221 return ret;
222}
223
224int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
225{
226 int ret;
227 struct gk20a_cs_snapshot_client_linux *client_linux;
228
229 nvgpu_mutex_acquire(&ch->cs_client_mutex);
230 if (!ch->cs_client) {
231 nvgpu_mutex_release(&ch->cs_client_mutex);
232 return 0;
171 } 233 }
234
235 client_linux = container_of(ch->cs_client,
236 struct gk20a_cs_snapshot_client_linux,
237 cs_client);
238
239 ret = gr_gk20a_css_detach(ch, ch->cs_client);
240
241 if (client_linux->dma_handler) {
242 if (ch->cs_client->snapshot)
243 dma_buf_vunmap(client_linux->dma_handler,
244 ch->cs_client->snapshot);
245 dma_buf_put(client_linux->dma_handler);
246 }
247
248 ch->cs_client = NULL;
249 nvgpu_kfree(ch->g, client_linux);
250
172 nvgpu_mutex_release(&ch->cs_client_mutex); 251 nvgpu_mutex_release(&ch->cs_client_mutex);
173 252
174 return ret; 253 return ret;
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
index 235d84ef..3ea8d765 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
@@ -15,11 +15,20 @@
15 15
16#include <linux/fs.h> 16#include <linux/fs.h>
17 17
18#include "gk20a/css_gr_gk20a.h"
19
18struct inode; 20struct inode;
19struct file; 21struct file;
20struct gk20a; 22struct gk20a;
21struct nvgpu_channel_open_args; 23struct nvgpu_channel_open_args;
22 24
25struct gk20a_cs_snapshot_client_linux {
26 struct gk20a_cs_snapshot_client cs_client;
27
28 u32 dmabuf_fd;
29 struct dma_buf *dma_handler;
30};
31
23int gk20a_channel_open(struct inode *inode, struct file *filp); 32int gk20a_channel_open(struct inode *inode, struct file *filp);
24int gk20a_channel_release(struct inode *inode, struct file *filp); 33int gk20a_channel_release(struct inode *inode, struct file *filp);
25long gk20a_channel_ioctl(struct file *filp, 34long gk20a_channel_ioctl(struct file *filp,
@@ -27,6 +36,8 @@ long gk20a_channel_ioctl(struct file *filp,
27int gk20a_channel_open_ioctl(struct gk20a *g, 36int gk20a_channel_open_ioctl(struct gk20a *g,
28 struct nvgpu_channel_open_args *args); 37 struct nvgpu_channel_open_args *args);
29 38
39int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
40
30extern const struct file_operations gk20a_event_id_ops; 41extern const struct file_operations gk20a_event_id_ops;
31extern const struct file_operations gk20a_channel_ops; 42extern const struct file_operations gk20a_channel_ops;
32 43
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a0415861..dac38739 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -376,22 +376,6 @@ void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
376 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); 376 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
377} 377}
378 378
379int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
380{
381 int ret;
382
383 nvgpu_mutex_acquire(&ch->cs_client_mutex);
384 if (ch->cs_client) {
385 ret = gr_gk20a_css_detach(ch, ch->cs_client);
386 ch->cs_client = NULL;
387 } else {
388 ret = 0;
389 }
390 nvgpu_mutex_release(&ch->cs_client_mutex);
391
392 return ret;
393}
394
395#endif 379#endif
396 380
397/* call ONLY when no references to the channel exist: after the last put */ 381/* call ONLY when no references to the channel exist: after the last put */
@@ -508,7 +492,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
508 492
509#if defined(CONFIG_GK20A_CYCLE_STATS) 493#if defined(CONFIG_GK20A_CYCLE_STATS)
510 gk20a_channel_free_cycle_stats_buffer(ch); 494 gk20a_channel_free_cycle_stats_buffer(ch);
511 gk20a_channel_free_cycle_stats_snapshot(ch);
512#endif 495#endif
513 496
514 channel_gk20a_free_priv_cmdbuf(ch); 497 channel_gk20a_free_priv_cmdbuf(ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index ff96d0d7..87ab6202 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -371,7 +371,6 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
371 unsigned int num_inflight_jobs, 371 unsigned int num_inflight_jobs,
372 u32 flags); 372 u32 flags);
373void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); 373void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
374int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
375 374
376void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); 375void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
377 376
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index e3896981..afba2496 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -45,11 +45,6 @@
45 ((cl)->perfmon_start <= (pm) && \ 45 ((cl)->perfmon_start <= (pm) && \
46 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) 46 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
47 47
48/* the minimal size of client buffer */
49#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
50 (sizeof(struct gk20a_cs_snapshot_fifo) + \
51 sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
52
53/* address of fifo entry by offset */ 48/* address of fifo entry by offset */
54#define CSS_FIFO_ENTRY(fifo, offs) \ 49#define CSS_FIFO_ENTRY(fifo, offs) \
55 ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs))) 50 ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
@@ -452,52 +447,16 @@ static int css_gr_free_client_data(struct gk20a *g,
452 ret = -EINVAL; 447 ret = -EINVAL;
453 } 448 }
454 449
455 if (client->dma_handler) {
456 if (client->snapshot)
457 dma_buf_vunmap(client->dma_handler, client->snapshot);
458 dma_buf_put(client->dma_handler);
459 }
460
461 nvgpu_kfree(g, client);
462
463 return ret; 450 return ret;
464} 451}
465 452
466static int css_gr_create_client_data(struct gk20a *g, 453static int css_gr_create_client_data(struct gk20a *g,
467 struct gk20a_cs_snapshot *data, 454 struct gk20a_cs_snapshot *data,
468 u32 dmabuf_fd, u32 perfmon_count, 455 u32 perfmon_count,
469 struct gk20a_cs_snapshot_client **client) 456 struct gk20a_cs_snapshot_client *cur)
470{ 457{
471 struct gk20a_cs_snapshot_client *cur;
472 int ret = 0; 458 int ret = 0;
473 459
474 cur = nvgpu_kzalloc(g, sizeof(*cur));
475 if (!cur) {
476 ret = -ENOMEM;
477 goto failed;
478 }
479
480 cur->dmabuf_fd = dmabuf_fd;
481 cur->dma_handler = dma_buf_get(cur->dmabuf_fd);
482 if (IS_ERR(cur->dma_handler)) {
483 ret = PTR_ERR(cur->dma_handler);
484 cur->dma_handler = NULL;
485 goto failed;
486 }
487
488 cur->snapshot = (struct gk20a_cs_snapshot_fifo *)
489 dma_buf_vmap(cur->dma_handler);
490 if (!cur->snapshot) {
491 ret = -ENOMEM;
492 goto failed;
493 }
494
495 cur->snapshot_size = cur->dma_handler->size;
496 if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
497 ret = -ENOMEM;
498 goto failed;
499 }
500
501 memset(cur->snapshot, 0, sizeof(*cur->snapshot)); 460 memset(cur->snapshot, 0, sizeof(*cur->snapshot));
502 cur->snapshot->start = sizeof(*cur->snapshot); 461 cur->snapshot->start = sizeof(*cur->snapshot);
503 /* we should be ensure that can fit all fifo entries here */ 462 /* we should be ensure that can fit all fifo entries here */
@@ -523,12 +482,10 @@ static int css_gr_create_client_data(struct gk20a *g,
523 } 482 }
524 483
525 nvgpu_list_add_tail(&cur->list, &data->clients); 484 nvgpu_list_add_tail(&cur->list, &data->clients);
526 *client = cur;
527 485
528 return 0; 486 return 0;
529 487
530failed: 488failed:
531 *client = NULL;
532 if (cur) 489 if (cur)
533 css_gr_free_client_data(g, data, cur); 490 css_gr_free_client_data(g, data, cur);
534 491
@@ -537,10 +494,9 @@ failed:
537 494
538 495
539int gr_gk20a_css_attach(struct channel_gk20a *ch, 496int gr_gk20a_css_attach(struct channel_gk20a *ch,
540 u32 dmabuf_fd,
541 u32 perfmon_count, 497 u32 perfmon_count,
542 u32 *perfmon_start, 498 u32 *perfmon_start,
543 struct gk20a_cs_snapshot_client **cs_client) 499 struct gk20a_cs_snapshot_client *cs_client)
544{ 500{
545 int ret = 0; 501 int ret = 0;
546 struct gk20a *g = ch->g; 502 struct gk20a *g = ch->g;
@@ -555,7 +511,6 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
555 return -EINVAL; 511 return -EINVAL;
556 512
557 gr = &g->gr; 513 gr = &g->gr;
558 *cs_client = NULL;
559 514
560 nvgpu_mutex_acquire(&gr->cs_lock); 515 nvgpu_mutex_acquire(&gr->cs_lock);
561 516
@@ -564,18 +519,17 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
564 goto failed; 519 goto failed;
565 520
566 ret = css_gr_create_client_data(g, gr->cs_data, 521 ret = css_gr_create_client_data(g, gr->cs_data,
567 dmabuf_fd,
568 perfmon_count, 522 perfmon_count,
569 cs_client); 523 cs_client);
570 if (ret) 524 if (ret)
571 goto failed; 525 goto failed;
572 526
573 ret = g->ops.css.enable_snapshot(ch, *cs_client); 527 ret = g->ops.css.enable_snapshot(ch, cs_client);
574 if (ret) 528 if (ret)
575 goto failed; 529 goto failed;
576 530
577 if (perfmon_start) 531 if (perfmon_start)
578 *perfmon_start = (*cs_client)->perfmon_start; 532 *perfmon_start = cs_client->perfmon_start;
579 533
580 nvgpu_mutex_release(&gr->cs_lock); 534 nvgpu_mutex_release(&gr->cs_lock);
581 535
@@ -583,9 +537,9 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
583 537
584failed: 538failed:
585 if (gr->cs_data) { 539 if (gr->cs_data) {
586 if (*cs_client) { 540 if (cs_client) {
587 css_gr_free_client_data(g, gr->cs_data, *cs_client); 541 css_gr_free_client_data(g, gr->cs_data, cs_client);
588 *cs_client = NULL; 542 cs_client = NULL;
589 } 543 }
590 544
591 if (nvgpu_list_empty(&gr->cs_data->clients)) 545 if (nvgpu_list_empty(&gr->cs_data->clients))
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
index f0ad6044..b6ad9fac 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
@@ -28,6 +28,10 @@
28/* the minimal size of HW buffer - should be enough to avoid HW overflows */ 28/* the minimal size of HW buffer - should be enough to avoid HW overflows */
29#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) 29#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
30 30
31struct gk20a;
32struct gr_gk20a;
33struct channel_gk20a;
34
31/* cycle stats fifo header (must match NvSnapshotBufferFifo) */ 35/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32struct gk20a_cs_snapshot_fifo { 36struct gk20a_cs_snapshot_fifo {
33 /* layout description of the buffer */ 37 /* layout description of the buffer */
@@ -95,8 +99,6 @@ struct gk20a_cs_snapshot_fifo_entry {
95/* cycle stats snapshot client data (e.g. associated with channel) */ 99/* cycle stats snapshot client data (e.g. associated with channel) */
96struct gk20a_cs_snapshot_client { 100struct gk20a_cs_snapshot_client {
97 struct nvgpu_list_node list; 101 struct nvgpu_list_node list;
98 u32 dmabuf_fd;
99 struct dma_buf *dma_handler;
100 struct gk20a_cs_snapshot_fifo *snapshot; 102 struct gk20a_cs_snapshot_fifo *snapshot;
101 u32 snapshot_size; 103 u32 snapshot_size;
102 u32 perfmon_start; 104 u32 perfmon_start;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 5a5809fc..14668dc6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -699,11 +699,10 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
699 699
700#if defined(CONFIG_GK20A_CYCLE_STATS) 700#if defined(CONFIG_GK20A_CYCLE_STATS)
701int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ 701int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
702 u32 dmabuf_fd, /* in - dma mapped memory */
703 u32 perfmon_id_count, /* in - number of perfmons*/ 702 u32 perfmon_id_count, /* in - number of perfmons*/
704 u32 *perfmon_id_start, /* out- index of first pm */ 703 u32 *perfmon_id_start, /* out- index of first pm */
705 /* out - pointer to client data used in later */ 704 /* in/out - pointer to client data used in later */
706 struct gk20a_cs_snapshot_client **css_client); 705 struct gk20a_cs_snapshot_client *css_client);
707 706
708int gr_gk20a_css_detach(struct channel_gk20a *ch, 707int gr_gk20a_css_detach(struct channel_gk20a *ch,
709 struct gk20a_cs_snapshot_client *css_client); 708 struct gk20a_cs_snapshot_client *css_client);