summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c233
1 files changed, 99 insertions, 134 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 62f60761..71614d6e 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -25,93 +25,13 @@
25#include "gk20a.h" 25#include "gk20a.h"
26#include "hw_perf_gk20a.h" 26#include "hw_perf_gk20a.h"
27#include "hw_mc_gk20a.h" 27#include "hw_mc_gk20a.h"
28 28#include "css_gr_gk20a.h"
29
30
31/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32struct gk20a_cs_snapshot_fifo {
33 /* layout description of the buffer */
34 u32 start;
35 u32 end;
36
37 /* snafu bits */
38 u32 hw_overflow_events_occured;
39 u32 sw_overflow_events_occured;
40
41 /* the kernel copies new entries to put and
42 * increment the put++. if put == get then
43 * overflowEventsOccured++
44 */
45 u32 put;
46 u32 _reserved10;
47 u32 _reserved11;
48 u32 _reserved12;
49
50 /* the driver/client reads from get until
51 * put==get, get++ */
52 u32 get;
53 u32 _reserved20;
54 u32 _reserved21;
55 u32 _reserved22;
56
57 /* unused */
58 u32 _reserved30;
59 u32 _reserved31;
60 u32 _reserved32;
61 u32 _reserved33;
62};
63
64/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
65struct gk20a_cs_snapshot_fifo_entry {
66 /* global 48 timestamp */
67 u32 timestamp31_00:32;
68 u32 timestamp39_32:8;
69
70 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
71 u32 perfmon_id:8;
72
73 /* typically samples_counter is wired to #pmtrigger count */
74 u32 samples_counter:12;
75
76 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
77 u32 ds:1;
78 u32 sz:1;
79 u32 zero0:1;
80 u32 zero1:1;
81
82 /* counter results */
83 u32 event_cnt:32;
84 u32 trigger0_cnt:32;
85 u32 trigger1_cnt:32;
86 u32 sample_cnt:32;
87
88 /* Local PmTrigger results for Maxwell+ or padding otherwise */
89 u16 local_trigger_b_count:16;
90 u16 book_mark_b:16;
91 u16 local_trigger_a_count:16;
92 u16 book_mark_a:16;
93};
94
95
96/* cycle stats snapshot client data (e.g. associated with channel) */
97struct gk20a_cs_snapshot_client {
98 struct list_head list;
99 u32 dmabuf_fd;
100 struct dma_buf *dma_handler;
101 struct gk20a_cs_snapshot_fifo *snapshot;
102 u32 snapshot_size;
103 u32 perfmon_start;
104 u32 perfmon_count;
105};
106 29
107/* check client for pointed perfmon ownership */ 30/* check client for pointed perfmon ownership */
108#define CONTAINS_PERFMON(cl, pm) \ 31#define CONTAINS_PERFMON(cl, pm) \
109 ((cl)->perfmon_start <= (pm) && \ 32 ((cl)->perfmon_start <= (pm) && \
110 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) 33 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
111 34
112/* the minimal size of HW buffer - should be enough to avoid HW overflows */
113#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
114
115/* the minimal size of client buffer */ 35/* the minimal size of client buffer */
116#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ 36#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
117 (sizeof(struct gk20a_cs_snapshot_fifo) + \ 37 (sizeof(struct gk20a_cs_snapshot_fifo) + \
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client {
131/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ 51/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
132#define CSS_MAX_PERFMON_IDS 256 52#define CSS_MAX_PERFMON_IDS 256
133 53
134/* local definitions to avoid hardcodes sizes and shifts */
135#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
136
137/* cycle stats snapshot control structure for one HW entry and many clients */
138struct gk20a_cs_snapshot {
139 unsigned long perfmon_ids[PM_BITMAP_SIZE];
140 struct list_head clients;
141 struct mem_desc hw_memdesc;
142 /* pointer to allocated cpu_va memory where GPU place data */
143 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
144 struct gk20a_cs_snapshot_fifo_entry *hw_end;
145 struct gk20a_cs_snapshot_fifo_entry *hw_get;
146};
147
148/* reports whether the hw queue overflowed */ 54/* reports whether the hw queue overflowed */
149static inline bool css_hw_get_overflow_status(struct gk20a *g) 55static inline bool css_hw_get_overflow_status(struct gk20a *g)
150{ 56{
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr)
215 return 0; 121 return 0;
216} 122}
217 123
218static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) 124static int css_hw_enable_snapshot(struct channel_gk20a *ch,
125 struct gk20a_cs_snapshot_client *cs_client)
219{ 126{
220 struct gk20a *g = gr->g; 127 struct gk20a *g = ch->g;
128 struct gr_gk20a *gr = &g->gr;
221 struct gk20a_cs_snapshot *data = gr->cs_data; 129 struct gk20a_cs_snapshot *data = gr->cs_data;
130 u32 snapshot_size = cs_client->snapshot_size;
222 int ret; 131 int ret;
223 132
224 u32 virt_addr_lo; 133 u32 virt_addr_lo;
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
317 226
318static void css_gr_free_shared_data(struct gr_gk20a *gr) 227static void css_gr_free_shared_data(struct gr_gk20a *gr)
319{ 228{
229 struct gk20a *g = gr->g;
230
320 if (gr->cs_data) { 231 if (gr->cs_data) {
321 /* the clients list is expected to be empty */ 232 /* the clients list is expected to be empty */
322 css_hw_disable_snapshot(gr); 233 g->ops.css.disable_snapshot(gr);
323 234
324 /* release the objects */ 235 /* release the objects */
325 kfree(gr->cs_data); 236 kfree(gr->cs_data);
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon)
344 return NULL; 255 return NULL;
345} 256}
346 257
347static int css_gr_flush_snapshots(struct gr_gk20a *gr) 258static int css_gr_flush_snapshots(struct channel_gk20a *ch)
348{ 259{
349 struct gk20a *g = gr->g; 260 struct gk20a *g = ch->g;
261 struct gr_gk20a *gr = &g->gr;
350 struct gk20a_cs_snapshot *css = gr->cs_data; 262 struct gk20a_cs_snapshot *css = gr->cs_data;
351 struct gk20a_cs_snapshot_client *cur; 263 struct gk20a_cs_snapshot_client *cur;
352 u32 pending; 264 u32 pending, completed;
265 bool hw_overflow;
266 int err;
353 267
354 /* variables for iterating over HW entries */ 268 /* variables for iterating over HW entries */
355 u32 sid; 269 u32 sid;
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
360 struct gk20a_cs_snapshot_fifo *dst; 274 struct gk20a_cs_snapshot_fifo *dst;
361 struct gk20a_cs_snapshot_fifo_entry *dst_get; 275 struct gk20a_cs_snapshot_fifo_entry *dst_get;
362 struct gk20a_cs_snapshot_fifo_entry *dst_put; 276 struct gk20a_cs_snapshot_fifo_entry *dst_put;
277 struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
363 struct gk20a_cs_snapshot_fifo_entry *dst_head; 278 struct gk20a_cs_snapshot_fifo_entry *dst_head;
364 struct gk20a_cs_snapshot_fifo_entry *dst_tail; 279 struct gk20a_cs_snapshot_fifo_entry *dst_tail;
365 280
366 if (!css) 281 if (!css)
367 return -EINVAL; 282 return -EINVAL;
368 283
369 if (!css->hw_snapshot)
370 return -EINVAL;
371
372 if (list_empty(&css->clients)) 284 if (list_empty(&css->clients))
373 return -EBADF; 285 return -EBADF;
374 286
375 /* check data available */ 287 /* check data available */
376 pending = css_hw_get_pending_snapshots(g); 288 err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
289 if (err)
290 return err;
291
377 if (!pending) 292 if (!pending)
378 return 0; 293 return 0;
379 294
380 if (css_hw_get_overflow_status(g)) { 295 if (hw_overflow) {
381 struct list_head *pos; 296 struct list_head *pos;
382 297
383 list_for_each(pos, &css->clients) { 298 list_for_each(pos, &css->clients) {
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
387 } 302 }
388 303
389 gk20a_warn(dev_from_gk20a(g), 304 gk20a_warn(dev_from_gk20a(g),
390 "cyclestats: hardware overflow detected\n"); 305 "cyclestats: hardware overflow detected\n");
391 } 306 }
392 307
393 /* proceed all items in HW buffer */ 308 /* process all items in HW buffer */
394 sid = 0; 309 sid = 0;
310 completed = 0;
395 cur = NULL; 311 cur = NULL;
396 dst = NULL; 312 dst = NULL;
397 dst_put = NULL; 313 dst_put = NULL;
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
419 dst_get = CSS_FIFO_ENTRY(dst, dst->get); 335 dst_get = CSS_FIFO_ENTRY(dst, dst->get);
420 dst_put = CSS_FIFO_ENTRY(dst, dst->put); 336 dst_put = CSS_FIFO_ENTRY(dst, dst->put);
421 dst_head = CSS_FIFO_ENTRY(dst, dst->start); 337 dst_head = CSS_FIFO_ENTRY(dst, dst->start);
422 dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1; 338 dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
339
340 dst_nxt = dst_put + 1;
341 if (dst_nxt == dst_tail)
342 dst_nxt = dst_head;
423 } else { 343 } else {
424 /* client not found - skipping this entry */ 344 /* client not found - skipping this entry */
425 gk20a_warn(dev_from_gk20a(g), 345 gk20a_warn(dev_from_gk20a(g),
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
430 } 350 }
431 351
432 /* check for software overflows */ 352 /* check for software overflows */
433 if (dst_put + 1 == dst_get || 353 if (dst_nxt == dst_get) {
434 (dst_put == dst_tail && dst_get == dst_head)) {
435 /* no data copy, no pointer updates */ 354 /* no data copy, no pointer updates */
436 dst->sw_overflow_events_occured++; 355 dst->sw_overflow_events_occured++;
437 gk20a_warn(dev_from_gk20a(g), 356 gk20a_warn(dev_from_gk20a(g),
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
439 src->perfmon_id); 358 src->perfmon_id);
440 } else { 359 } else {
441 *dst_put = *src; 360 *dst_put = *src;
442 if (dst_put == dst_tail) 361 completed++;
443 dst_put = dst_head; 362
444 else 363 dst_put = dst_nxt++;
445 dst_put++; 364
365 if (dst_nxt == dst_tail)
366 dst_nxt = dst_head;
446 } 367 }
447 368
448next_hw_fifo_entry: 369next_hw_fifo_entry:
@@ -465,14 +386,17 @@ next_hw_fifo_entry:
465 (css->hw_end - css->hw_get) * sizeof(*src)); 386 (css->hw_end - css->hw_get) * sizeof(*src));
466 } 387 }
467 gr->cs_data->hw_get = src; 388 gr->cs_data->hw_get = src;
468 css_hw_set_handled_snapshots(g, sid); 389
469 if (pending != sid) { 390 if (g->ops.css.set_handled_snapshots)
391 g->ops.css.set_handled_snapshots(g, sid);
392
393 if (completed != sid) {
470 /* not all entries proceed correctly. some of problems */ 394 /* not all entries proceed correctly. some of problems */
471 /* reported as overflows, some as orphaned perfmons, */ 395 /* reported as overflows, some as orphaned perfmons, */
472 /* but it will be better notify with summary about it */ 396 /* but it will be better notify with summary about it */
473 gk20a_warn(dev_from_gk20a(g), 397 gk20a_warn(dev_from_gk20a(g),
474 "cyclestats: done %u from %u entries\n", 398 "cyclestats: completed %u from %u entries\n",
475 sid, pending); 399 completed, pending);
476 } 400 }
477 401
478 return 0; 402 return 0;
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
511} 435}
512 436
513 437
514static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, 438static int css_gr_free_client_data(struct gk20a *g,
439 struct gk20a_cs_snapshot *data,
515 struct gk20a_cs_snapshot_client *client) 440 struct gk20a_cs_snapshot_client *client)
516{ 441{
517 int ret = 0; 442 int ret = 0;
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
519 if (client->list.next && client->list.prev) 444 if (client->list.next && client->list.prev)
520 list_del(&client->list); 445 list_del(&client->list);
521 446
522 if (client->perfmon_start && client->perfmon_count) { 447 if (client->perfmon_start && client->perfmon_count
523 if (client->perfmon_count != css_gr_release_perfmon_ids(data, 448 && g->ops.css.release_perfmon_ids) {
449 if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
524 client->perfmon_start, client->perfmon_count)) 450 client->perfmon_start, client->perfmon_count))
525 ret = -EINVAL; 451 ret = -EINVAL;
526 } 452 }
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
536 return ret; 462 return ret;
537} 463}
538 464
539static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, 465static int css_gr_create_client_data(struct gk20a *g,
466 struct gk20a_cs_snapshot *data,
540 u32 dmabuf_fd, u32 perfmon_count, 467 u32 dmabuf_fd, u32 perfmon_count,
541 struct gk20a_cs_snapshot_client **client) 468 struct gk20a_cs_snapshot_client **client)
542{ 469{
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
581 cur->snapshot->put = cur->snapshot->start; 508 cur->snapshot->put = cur->snapshot->start;
582 509
583 cur->perfmon_count = perfmon_count; 510 cur->perfmon_count = perfmon_count;
584 if (cur->perfmon_count) { 511
585 cur->perfmon_start = css_gr_allocate_perfmon_ids(data, 512 /* In virtual case, perfmon ID allocation is handled by the server
513 * at the time of the attach (allocate_perfmon_ids is NULL in this case)
514 */
515 if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
516 cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
586 cur->perfmon_count); 517 cur->perfmon_count);
587 if (!cur->perfmon_start) { 518 if (!cur->perfmon_start) {
588 ret = -ENOENT; 519 ret = -ENOENT;
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
598failed: 529failed:
599 *client = NULL; 530 *client = NULL;
600 if (cur) 531 if (cur)
601 css_gr_free_client_data(data, cur); 532 css_gr_free_client_data(g, data, cur);
602 533
603 return ret; 534 return ret;
604} 535}
605 536
606 537
607int gr_gk20a_css_attach(struct gk20a *g, 538int gr_gk20a_css_attach(struct channel_gk20a *ch,
608 u32 dmabuf_fd, 539 u32 dmabuf_fd,
609 u32 perfmon_count, 540 u32 perfmon_count,
610 u32 *perfmon_start, 541 u32 *perfmon_start,
611 struct gk20a_cs_snapshot_client **cs_client) 542 struct gk20a_cs_snapshot_client **cs_client)
612{ 543{
613 int ret = 0; 544 int ret = 0;
545 struct gk20a *g = ch->g;
614 struct gr_gk20a *gr; 546 struct gr_gk20a *gr;
615 547
616 /* we must have a placeholder to store pointer to client structure */ 548 /* we must have a placeholder to store pointer to client structure */
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g,
630 if (ret) 562 if (ret)
631 goto failed; 563 goto failed;
632 564
633 ret = css_gr_create_client_data(gr->cs_data, 565 ret = css_gr_create_client_data(g, gr->cs_data,
634 dmabuf_fd, 566 dmabuf_fd,
635 perfmon_count, 567 perfmon_count,
636 cs_client); 568 cs_client);
637 if (ret) 569 if (ret)
638 goto failed; 570 goto failed;
639 571
640 ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size); 572 ret = g->ops.css.enable_snapshot(ch, *cs_client);
641 if (ret) 573 if (ret)
642 goto failed; 574 goto failed;
643 575
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g,
651failed: 583failed:
652 if (gr->cs_data) { 584 if (gr->cs_data) {
653 if (*cs_client) { 585 if (*cs_client) {
654 css_gr_free_client_data(gr->cs_data, *cs_client); 586 css_gr_free_client_data(g, gr->cs_data, *cs_client);
655 *cs_client = NULL; 587 *cs_client = NULL;
656 } 588 }
657 589
@@ -666,10 +598,11 @@ failed:
666 return ret; 598 return ret;
667} 599}
668 600
669int gr_gk20a_css_detach(struct gk20a *g, 601int gr_gk20a_css_detach(struct channel_gk20a *ch,
670 struct gk20a_cs_snapshot_client *cs_client) 602 struct gk20a_cs_snapshot_client *cs_client)
671{ 603{
672 int ret = 0; 604 int ret = 0;
605 struct gk20a *g = ch->g;
673 struct gr_gk20a *gr; 606 struct gr_gk20a *gr;
674 607
675 if (!cs_client) 608 if (!cs_client)
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g,
680 if (gr->cs_data) { 613 if (gr->cs_data) {
681 struct gk20a_cs_snapshot *data = gr->cs_data; 614 struct gk20a_cs_snapshot *data = gr->cs_data;
682 615
683 ret = css_gr_free_client_data(data, cs_client); 616 if (g->ops.css.detach_snapshot)
617 g->ops.css.detach_snapshot(ch, cs_client);
618
619 ret = css_gr_free_client_data(g, data, cs_client);
684 if (list_empty(&data->clients)) 620 if (list_empty(&data->clients))
685 css_gr_free_shared_data(gr); 621 css_gr_free_shared_data(gr);
686 } else { 622 } else {
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g,
691 return ret; 627 return ret;
692} 628}
693 629
694int gr_gk20a_css_flush(struct gk20a *g, 630int gr_gk20a_css_flush(struct channel_gk20a *ch,
695 struct gk20a_cs_snapshot_client *cs_client) 631 struct gk20a_cs_snapshot_client *cs_client)
696{ 632{
697 int ret = 0; 633 int ret = 0;
634 struct gk20a *g = ch->g;
698 struct gr_gk20a *gr; 635 struct gr_gk20a *gr;
699 636
700 if (!cs_client) 637 if (!cs_client)
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g,
702 639
703 gr = &g->gr; 640 gr = &g->gr;
704 mutex_lock(&gr->cs_lock); 641 mutex_lock(&gr->cs_lock);
705 ret = css_gr_flush_snapshots(gr); 642 ret = css_gr_flush_snapshots(ch);
706 mutex_unlock(&gr->cs_lock); 643 mutex_unlock(&gr->cs_lock);
707 644
708 return ret; 645 return ret;
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
718 mutex_unlock(&gr->cs_lock); 655 mutex_unlock(&gr->cs_lock);
719 mutex_destroy(&gr->cs_lock); 656 mutex_destroy(&gr->cs_lock);
720} 657}
658
659static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
660 bool *hw_overflow)
661{
662 struct gk20a *g = ch->g;
663 struct gr_gk20a *gr = &g->gr;
664 struct gk20a_cs_snapshot *css = gr->cs_data;
665
666 if (!css->hw_snapshot)
667 return -EINVAL;
668
669 *pending = css_hw_get_pending_snapshots(g);
670 if (!*pending)
671 return 0;
672
673 *hw_overflow = css_hw_get_overflow_status(g);
674 return 0;
675}
676
677void gk20a_init_css_ops(struct gpu_ops *gops)
678{
679 gops->css.enable_snapshot = css_hw_enable_snapshot;
680 gops->css.disable_snapshot = css_hw_disable_snapshot;
681 gops->css.check_data_available = css_hw_check_data_available;
682 gops->css.set_handled_snapshots = css_hw_set_handled_snapshots;
683 gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids;
684 gops->css.release_perfmon_ids = css_gr_release_perfmon_ids;
685}