summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2016-08-31 20:04:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-08 19:04:09 -0400
commit9aa7de15c2a644e9c7e9c157e49087e66d4ac3d0 (patch)
treee5080886f09aa75c6a3cc83e5b27f8f7553678a4 /drivers
parent70cad5fbb593602a49f91e57c04d1da0334b3a49 (diff)
gpu: nvgpu: vgpu: cyclestat snapshot support
Add support for cyclestats snapshots in the virtual case Bug 1700143 JIRA EVLR-278 Change-Id: I376a8804d57324f43eb16452d857a3b7bb0ecc90 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1211547 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c233
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h119
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h18
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h12
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gm206/hal_gm206.c4
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c4
-rw-r--r--drivers/gpu/nvgpu/vgpu/css_vgpu.c221
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c4
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.h8
13 files changed, 494 insertions, 143 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 5ca2f56f..b8e38919 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -105,6 +105,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
105 vgpu/dbg_vgpu.o \ 105 vgpu/dbg_vgpu.o \
106 vgpu/fecs_trace_vgpu.o \ 106 vgpu/fecs_trace_vgpu.o \
107 vgpu/tsg_vgpu.o \ 107 vgpu/tsg_vgpu.o \
108 vgpu/css_vgpu.o \
108 vgpu/gk20a/vgpu_hal_gk20a.o \ 109 vgpu/gk20a/vgpu_hal_gk20a.o \
109 vgpu/gk20a/vgpu_gr_gk20a.o \ 110 vgpu/gk20a/vgpu_gr_gk20a.o \
110 vgpu/gm20b/vgpu_hal_gm20b.o \ 111 vgpu/gm20b/vgpu_hal_gm20b.o \
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 49711af9..d23a8026 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -603,7 +603,7 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
603 603
604 mutex_lock(&ch->cs_client_mutex); 604 mutex_lock(&ch->cs_client_mutex);
605 if (ch->cs_client) 605 if (ch->cs_client)
606 ret = gr_gk20a_css_flush(ch->g, ch->cs_client); 606 ret = gr_gk20a_css_flush(ch, ch->cs_client);
607 else 607 else
608 ret = -EBADF; 608 ret = -EBADF;
609 mutex_unlock(&ch->cs_client_mutex); 609 mutex_unlock(&ch->cs_client_mutex);
@@ -622,7 +622,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
622 if (ch->cs_client) { 622 if (ch->cs_client) {
623 ret = -EEXIST; 623 ret = -EEXIST;
624 } else { 624 } else {
625 ret = gr_gk20a_css_attach(ch->g, 625 ret = gr_gk20a_css_attach(ch,
626 dmabuf_fd, 626 dmabuf_fd,
627 perfmon_id_count, 627 perfmon_id_count,
628 perfmon_id_start, 628 perfmon_id_start,
@@ -639,7 +639,7 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
639 639
640 mutex_lock(&ch->cs_client_mutex); 640 mutex_lock(&ch->cs_client_mutex);
641 if (ch->cs_client) { 641 if (ch->cs_client) {
642 ret = gr_gk20a_css_detach(ch->g, ch->cs_client); 642 ret = gr_gk20a_css_detach(ch, ch->cs_client);
643 ch->cs_client = NULL; 643 ch->cs_client = NULL;
644 } else { 644 } else {
645 ret = 0; 645 ret = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 62f60761..71614d6e 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -25,93 +25,13 @@
25#include "gk20a.h" 25#include "gk20a.h"
26#include "hw_perf_gk20a.h" 26#include "hw_perf_gk20a.h"
27#include "hw_mc_gk20a.h" 27#include "hw_mc_gk20a.h"
28 28#include "css_gr_gk20a.h"
29
30
31/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32struct gk20a_cs_snapshot_fifo {
33 /* layout description of the buffer */
34 u32 start;
35 u32 end;
36
37 /* snafu bits */
38 u32 hw_overflow_events_occured;
39 u32 sw_overflow_events_occured;
40
41 /* the kernel copies new entries to put and
42 * increment the put++. if put == get then
43 * overflowEventsOccured++
44 */
45 u32 put;
46 u32 _reserved10;
47 u32 _reserved11;
48 u32 _reserved12;
49
50 /* the driver/client reads from get until
51 * put==get, get++ */
52 u32 get;
53 u32 _reserved20;
54 u32 _reserved21;
55 u32 _reserved22;
56
57 /* unused */
58 u32 _reserved30;
59 u32 _reserved31;
60 u32 _reserved32;
61 u32 _reserved33;
62};
63
64/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
65struct gk20a_cs_snapshot_fifo_entry {
66 /* global 48 timestamp */
67 u32 timestamp31_00:32;
68 u32 timestamp39_32:8;
69
70 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
71 u32 perfmon_id:8;
72
73 /* typically samples_counter is wired to #pmtrigger count */
74 u32 samples_counter:12;
75
76 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
77 u32 ds:1;
78 u32 sz:1;
79 u32 zero0:1;
80 u32 zero1:1;
81
82 /* counter results */
83 u32 event_cnt:32;
84 u32 trigger0_cnt:32;
85 u32 trigger1_cnt:32;
86 u32 sample_cnt:32;
87
88 /* Local PmTrigger results for Maxwell+ or padding otherwise */
89 u16 local_trigger_b_count:16;
90 u16 book_mark_b:16;
91 u16 local_trigger_a_count:16;
92 u16 book_mark_a:16;
93};
94
95
96/* cycle stats snapshot client data (e.g. associated with channel) */
97struct gk20a_cs_snapshot_client {
98 struct list_head list;
99 u32 dmabuf_fd;
100 struct dma_buf *dma_handler;
101 struct gk20a_cs_snapshot_fifo *snapshot;
102 u32 snapshot_size;
103 u32 perfmon_start;
104 u32 perfmon_count;
105};
106 29
107/* check client for pointed perfmon ownership */ 30/* check client for pointed perfmon ownership */
108#define CONTAINS_PERFMON(cl, pm) \ 31#define CONTAINS_PERFMON(cl, pm) \
109 ((cl)->perfmon_start <= (pm) && \ 32 ((cl)->perfmon_start <= (pm) && \
110 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) 33 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
111 34
112/* the minimal size of HW buffer - should be enough to avoid HW overflows */
113#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
114
115/* the minimal size of client buffer */ 35/* the minimal size of client buffer */
116#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ 36#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
117 (sizeof(struct gk20a_cs_snapshot_fifo) + \ 37 (sizeof(struct gk20a_cs_snapshot_fifo) + \
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client {
131/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ 51/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
132#define CSS_MAX_PERFMON_IDS 256 52#define CSS_MAX_PERFMON_IDS 256
133 53
134/* local definitions to avoid hardcodes sizes and shifts */
135#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
136
137/* cycle stats snapshot control structure for one HW entry and many clients */
138struct gk20a_cs_snapshot {
139 unsigned long perfmon_ids[PM_BITMAP_SIZE];
140 struct list_head clients;
141 struct mem_desc hw_memdesc;
142 /* pointer to allocated cpu_va memory where GPU place data */
143 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
144 struct gk20a_cs_snapshot_fifo_entry *hw_end;
145 struct gk20a_cs_snapshot_fifo_entry *hw_get;
146};
147
148/* reports whether the hw queue overflowed */ 54/* reports whether the hw queue overflowed */
149static inline bool css_hw_get_overflow_status(struct gk20a *g) 55static inline bool css_hw_get_overflow_status(struct gk20a *g)
150{ 56{
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr)
215 return 0; 121 return 0;
216} 122}
217 123
218static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) 124static int css_hw_enable_snapshot(struct channel_gk20a *ch,
125 struct gk20a_cs_snapshot_client *cs_client)
219{ 126{
220 struct gk20a *g = gr->g; 127 struct gk20a *g = ch->g;
128 struct gr_gk20a *gr = &g->gr;
221 struct gk20a_cs_snapshot *data = gr->cs_data; 129 struct gk20a_cs_snapshot *data = gr->cs_data;
130 u32 snapshot_size = cs_client->snapshot_size;
222 int ret; 131 int ret;
223 132
224 u32 virt_addr_lo; 133 u32 virt_addr_lo;
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
317 226
318static void css_gr_free_shared_data(struct gr_gk20a *gr) 227static void css_gr_free_shared_data(struct gr_gk20a *gr)
319{ 228{
229 struct gk20a *g = gr->g;
230
320 if (gr->cs_data) { 231 if (gr->cs_data) {
321 /* the clients list is expected to be empty */ 232 /* the clients list is expected to be empty */
322 css_hw_disable_snapshot(gr); 233 g->ops.css.disable_snapshot(gr);
323 234
324 /* release the objects */ 235 /* release the objects */
325 kfree(gr->cs_data); 236 kfree(gr->cs_data);
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon)
344 return NULL; 255 return NULL;
345} 256}
346 257
347static int css_gr_flush_snapshots(struct gr_gk20a *gr) 258static int css_gr_flush_snapshots(struct channel_gk20a *ch)
348{ 259{
349 struct gk20a *g = gr->g; 260 struct gk20a *g = ch->g;
261 struct gr_gk20a *gr = &g->gr;
350 struct gk20a_cs_snapshot *css = gr->cs_data; 262 struct gk20a_cs_snapshot *css = gr->cs_data;
351 struct gk20a_cs_snapshot_client *cur; 263 struct gk20a_cs_snapshot_client *cur;
352 u32 pending; 264 u32 pending, completed;
265 bool hw_overflow;
266 int err;
353 267
354 /* variables for iterating over HW entries */ 268 /* variables for iterating over HW entries */
355 u32 sid; 269 u32 sid;
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
360 struct gk20a_cs_snapshot_fifo *dst; 274 struct gk20a_cs_snapshot_fifo *dst;
361 struct gk20a_cs_snapshot_fifo_entry *dst_get; 275 struct gk20a_cs_snapshot_fifo_entry *dst_get;
362 struct gk20a_cs_snapshot_fifo_entry *dst_put; 276 struct gk20a_cs_snapshot_fifo_entry *dst_put;
277 struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
363 struct gk20a_cs_snapshot_fifo_entry *dst_head; 278 struct gk20a_cs_snapshot_fifo_entry *dst_head;
364 struct gk20a_cs_snapshot_fifo_entry *dst_tail; 279 struct gk20a_cs_snapshot_fifo_entry *dst_tail;
365 280
366 if (!css) 281 if (!css)
367 return -EINVAL; 282 return -EINVAL;
368 283
369 if (!css->hw_snapshot)
370 return -EINVAL;
371
372 if (list_empty(&css->clients)) 284 if (list_empty(&css->clients))
373 return -EBADF; 285 return -EBADF;
374 286
375 /* check data available */ 287 /* check data available */
376 pending = css_hw_get_pending_snapshots(g); 288 err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
289 if (err)
290 return err;
291
377 if (!pending) 292 if (!pending)
378 return 0; 293 return 0;
379 294
380 if (css_hw_get_overflow_status(g)) { 295 if (hw_overflow) {
381 struct list_head *pos; 296 struct list_head *pos;
382 297
383 list_for_each(pos, &css->clients) { 298 list_for_each(pos, &css->clients) {
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
387 } 302 }
388 303
389 gk20a_warn(dev_from_gk20a(g), 304 gk20a_warn(dev_from_gk20a(g),
390 "cyclestats: hardware overflow detected\n"); 305 "cyclestats: hardware overflow detected\n");
391 } 306 }
392 307
393 /* proceed all items in HW buffer */ 308 /* process all items in HW buffer */
394 sid = 0; 309 sid = 0;
310 completed = 0;
395 cur = NULL; 311 cur = NULL;
396 dst = NULL; 312 dst = NULL;
397 dst_put = NULL; 313 dst_put = NULL;
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
419 dst_get = CSS_FIFO_ENTRY(dst, dst->get); 335 dst_get = CSS_FIFO_ENTRY(dst, dst->get);
420 dst_put = CSS_FIFO_ENTRY(dst, dst->put); 336 dst_put = CSS_FIFO_ENTRY(dst, dst->put);
421 dst_head = CSS_FIFO_ENTRY(dst, dst->start); 337 dst_head = CSS_FIFO_ENTRY(dst, dst->start);
422 dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1; 338 dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
339
340 dst_nxt = dst_put + 1;
341 if (dst_nxt == dst_tail)
342 dst_nxt = dst_head;
423 } else { 343 } else {
424 /* client not found - skipping this entry */ 344 /* client not found - skipping this entry */
425 gk20a_warn(dev_from_gk20a(g), 345 gk20a_warn(dev_from_gk20a(g),
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
430 } 350 }
431 351
432 /* check for software overflows */ 352 /* check for software overflows */
433 if (dst_put + 1 == dst_get || 353 if (dst_nxt == dst_get) {
434 (dst_put == dst_tail && dst_get == dst_head)) {
435 /* no data copy, no pointer updates */ 354 /* no data copy, no pointer updates */
436 dst->sw_overflow_events_occured++; 355 dst->sw_overflow_events_occured++;
437 gk20a_warn(dev_from_gk20a(g), 356 gk20a_warn(dev_from_gk20a(g),
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
439 src->perfmon_id); 358 src->perfmon_id);
440 } else { 359 } else {
441 *dst_put = *src; 360 *dst_put = *src;
442 if (dst_put == dst_tail) 361 completed++;
443 dst_put = dst_head; 362
444 else 363 dst_put = dst_nxt++;
445 dst_put++; 364
365 if (dst_nxt == dst_tail)
366 dst_nxt = dst_head;
446 } 367 }
447 368
448next_hw_fifo_entry: 369next_hw_fifo_entry:
@@ -465,14 +386,17 @@ next_hw_fifo_entry:
465 (css->hw_end - css->hw_get) * sizeof(*src)); 386 (css->hw_end - css->hw_get) * sizeof(*src));
466 } 387 }
467 gr->cs_data->hw_get = src; 388 gr->cs_data->hw_get = src;
468 css_hw_set_handled_snapshots(g, sid); 389
469 if (pending != sid) { 390 if (g->ops.css.set_handled_snapshots)
391 g->ops.css.set_handled_snapshots(g, sid);
392
393 if (completed != sid) {
470 /* not all entries proceed correctly. some of problems */ 394 /* not all entries proceed correctly. some of problems */
471 /* reported as overflows, some as orphaned perfmons, */ 395 /* reported as overflows, some as orphaned perfmons, */
472 /* but it will be better notify with summary about it */ 396 /* but it will be better notify with summary about it */
473 gk20a_warn(dev_from_gk20a(g), 397 gk20a_warn(dev_from_gk20a(g),
474 "cyclestats: done %u from %u entries\n", 398 "cyclestats: completed %u from %u entries\n",
475 sid, pending); 399 completed, pending);
476 } 400 }
477 401
478 return 0; 402 return 0;
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
511} 435}
512 436
513 437
514static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, 438static int css_gr_free_client_data(struct gk20a *g,
439 struct gk20a_cs_snapshot *data,
515 struct gk20a_cs_snapshot_client *client) 440 struct gk20a_cs_snapshot_client *client)
516{ 441{
517 int ret = 0; 442 int ret = 0;
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
519 if (client->list.next && client->list.prev) 444 if (client->list.next && client->list.prev)
520 list_del(&client->list); 445 list_del(&client->list);
521 446
522 if (client->perfmon_start && client->perfmon_count) { 447 if (client->perfmon_start && client->perfmon_count
523 if (client->perfmon_count != css_gr_release_perfmon_ids(data, 448 && g->ops.css.release_perfmon_ids) {
449 if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
524 client->perfmon_start, client->perfmon_count)) 450 client->perfmon_start, client->perfmon_count))
525 ret = -EINVAL; 451 ret = -EINVAL;
526 } 452 }
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
536 return ret; 462 return ret;
537} 463}
538 464
539static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, 465static int css_gr_create_client_data(struct gk20a *g,
466 struct gk20a_cs_snapshot *data,
540 u32 dmabuf_fd, u32 perfmon_count, 467 u32 dmabuf_fd, u32 perfmon_count,
541 struct gk20a_cs_snapshot_client **client) 468 struct gk20a_cs_snapshot_client **client)
542{ 469{
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
581 cur->snapshot->put = cur->snapshot->start; 508 cur->snapshot->put = cur->snapshot->start;
582 509
583 cur->perfmon_count = perfmon_count; 510 cur->perfmon_count = perfmon_count;
584 if (cur->perfmon_count) { 511
585 cur->perfmon_start = css_gr_allocate_perfmon_ids(data, 512 /* In virtual case, perfmon ID allocation is handled by the server
513 * at the time of the attach (allocate_perfmon_ids is NULL in this case)
514 */
515 if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
516 cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
586 cur->perfmon_count); 517 cur->perfmon_count);
587 if (!cur->perfmon_start) { 518 if (!cur->perfmon_start) {
588 ret = -ENOENT; 519 ret = -ENOENT;
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
598failed: 529failed:
599 *client = NULL; 530 *client = NULL;
600 if (cur) 531 if (cur)
601 css_gr_free_client_data(data, cur); 532 css_gr_free_client_data(g, data, cur);
602 533
603 return ret; 534 return ret;
604} 535}
605 536
606 537
607int gr_gk20a_css_attach(struct gk20a *g, 538int gr_gk20a_css_attach(struct channel_gk20a *ch,
608 u32 dmabuf_fd, 539 u32 dmabuf_fd,
609 u32 perfmon_count, 540 u32 perfmon_count,
610 u32 *perfmon_start, 541 u32 *perfmon_start,
611 struct gk20a_cs_snapshot_client **cs_client) 542 struct gk20a_cs_snapshot_client **cs_client)
612{ 543{
613 int ret = 0; 544 int ret = 0;
545 struct gk20a *g = ch->g;
614 struct gr_gk20a *gr; 546 struct gr_gk20a *gr;
615 547
616 /* we must have a placeholder to store pointer to client structure */ 548 /* we must have a placeholder to store pointer to client structure */
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g,
630 if (ret) 562 if (ret)
631 goto failed; 563 goto failed;
632 564
633 ret = css_gr_create_client_data(gr->cs_data, 565 ret = css_gr_create_client_data(g, gr->cs_data,
634 dmabuf_fd, 566 dmabuf_fd,
635 perfmon_count, 567 perfmon_count,
636 cs_client); 568 cs_client);
637 if (ret) 569 if (ret)
638 goto failed; 570 goto failed;
639 571
640 ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size); 572 ret = g->ops.css.enable_snapshot(ch, *cs_client);
641 if (ret) 573 if (ret)
642 goto failed; 574 goto failed;
643 575
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g,
651failed: 583failed:
652 if (gr->cs_data) { 584 if (gr->cs_data) {
653 if (*cs_client) { 585 if (*cs_client) {
654 css_gr_free_client_data(gr->cs_data, *cs_client); 586 css_gr_free_client_data(g, gr->cs_data, *cs_client);
655 *cs_client = NULL; 587 *cs_client = NULL;
656 } 588 }
657 589
@@ -666,10 +598,11 @@ failed:
666 return ret; 598 return ret;
667} 599}
668 600
669int gr_gk20a_css_detach(struct gk20a *g, 601int gr_gk20a_css_detach(struct channel_gk20a *ch,
670 struct gk20a_cs_snapshot_client *cs_client) 602 struct gk20a_cs_snapshot_client *cs_client)
671{ 603{
672 int ret = 0; 604 int ret = 0;
605 struct gk20a *g = ch->g;
673 struct gr_gk20a *gr; 606 struct gr_gk20a *gr;
674 607
675 if (!cs_client) 608 if (!cs_client)
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g,
680 if (gr->cs_data) { 613 if (gr->cs_data) {
681 struct gk20a_cs_snapshot *data = gr->cs_data; 614 struct gk20a_cs_snapshot *data = gr->cs_data;
682 615
683 ret = css_gr_free_client_data(data, cs_client); 616 if (g->ops.css.detach_snapshot)
617 g->ops.css.detach_snapshot(ch, cs_client);
618
619 ret = css_gr_free_client_data(g, data, cs_client);
684 if (list_empty(&data->clients)) 620 if (list_empty(&data->clients))
685 css_gr_free_shared_data(gr); 621 css_gr_free_shared_data(gr);
686 } else { 622 } else {
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g,
691 return ret; 627 return ret;
692} 628}
693 629
694int gr_gk20a_css_flush(struct gk20a *g, 630int gr_gk20a_css_flush(struct channel_gk20a *ch,
695 struct gk20a_cs_snapshot_client *cs_client) 631 struct gk20a_cs_snapshot_client *cs_client)
696{ 632{
697 int ret = 0; 633 int ret = 0;
634 struct gk20a *g = ch->g;
698 struct gr_gk20a *gr; 635 struct gr_gk20a *gr;
699 636
700 if (!cs_client) 637 if (!cs_client)
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g,
702 639
703 gr = &g->gr; 640 gr = &g->gr;
704 mutex_lock(&gr->cs_lock); 641 mutex_lock(&gr->cs_lock);
705 ret = css_gr_flush_snapshots(gr); 642 ret = css_gr_flush_snapshots(ch);
706 mutex_unlock(&gr->cs_lock); 643 mutex_unlock(&gr->cs_lock);
707 644
708 return ret; 645 return ret;
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
718 mutex_unlock(&gr->cs_lock); 655 mutex_unlock(&gr->cs_lock);
719 mutex_destroy(&gr->cs_lock); 656 mutex_destroy(&gr->cs_lock);
720} 657}
658
659static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
660 bool *hw_overflow)
661{
662 struct gk20a *g = ch->g;
663 struct gr_gk20a *gr = &g->gr;
664 struct gk20a_cs_snapshot *css = gr->cs_data;
665
666 if (!css->hw_snapshot)
667 return -EINVAL;
668
669 *pending = css_hw_get_pending_snapshots(g);
670 if (!*pending)
671 return 0;
672
673 *hw_overflow = css_hw_get_overflow_status(g);
674 return 0;
675}
676
677void gk20a_init_css_ops(struct gpu_ops *gops)
678{
679 gops->css.enable_snapshot = css_hw_enable_snapshot;
680 gops->css.disable_snapshot = css_hw_disable_snapshot;
681 gops->css.check_data_available = css_hw_check_data_available;
682 gops->css.set_handled_snapshots = css_hw_set_handled_snapshots;
683 gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids;
684 gops->css.release_perfmon_ids = css_gr_release_perfmon_ids;
685}
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
new file mode 100644
index 00000000..be638abf
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
@@ -0,0 +1,119 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef CSS_GR_GK20A_H
20#define CSS_GR_GK20A_H
21
22/* the minimal size of HW buffer - should be enough to avoid HW overflows */
23#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
24
25/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
26struct gk20a_cs_snapshot_fifo {
27 /* layout description of the buffer */
28 u32 start;
29 u32 end;
30
31 /* snafu bits */
32 u32 hw_overflow_events_occured;
33 u32 sw_overflow_events_occured;
34
35 /* the kernel copies new entries to put and
36 * increment the put++. if put == get then
37 * overflowEventsOccured++
38 */
39 u32 put;
40 u32 _reserved10;
41 u32 _reserved11;
42 u32 _reserved12;
43
44 /* the driver/client reads from get until
45 * put==get, get++ */
46 u32 get;
47 u32 _reserved20;
48 u32 _reserved21;
49 u32 _reserved22;
50
51 /* unused */
52 u32 _reserved30;
53 u32 _reserved31;
54 u32 _reserved32;
55 u32 _reserved33;
56};
57
58/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
59struct gk20a_cs_snapshot_fifo_entry {
60 /* global 48 timestamp */
61 u32 timestamp31_00:32;
62 u32 timestamp39_32:8;
63
64 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
65 u32 perfmon_id:8;
66
67 /* typically samples_counter is wired to #pmtrigger count */
68 u32 samples_counter:12;
69
70 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
71 u32 ds:1;
72 u32 sz:1;
73 u32 zero0:1;
74 u32 zero1:1;
75
76 /* counter results */
77 u32 event_cnt:32;
78 u32 trigger0_cnt:32;
79 u32 trigger1_cnt:32;
80 u32 sample_cnt:32;
81
82 /* Local PmTrigger results for Maxwell+ or padding otherwise */
83 u16 local_trigger_b_count:16;
84 u16 book_mark_b:16;
85 u16 local_trigger_a_count:16;
86 u16 book_mark_a:16;
87};
88
89/* cycle stats snapshot client data (e.g. associated with channel) */
90struct gk20a_cs_snapshot_client {
91 struct list_head list;
92 u32 dmabuf_fd;
93 struct dma_buf *dma_handler;
94 struct gk20a_cs_snapshot_fifo *snapshot;
95 u32 snapshot_size;
96 u32 perfmon_start;
97 u32 perfmon_count;
98};
99
100/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
101#define CSS_MAX_PERFMON_IDS 256
102
103/* local definitions to avoid hardcodes sizes and shifts */
104#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
105
106/* cycle stats snapshot control structure for one HW entry and many clients */
107struct gk20a_cs_snapshot {
108 unsigned long perfmon_ids[PM_BITMAP_SIZE];
109 struct list_head clients;
110 struct mem_desc hw_memdesc;
111 /* pointer to allocated cpu_va memory where GPU place data */
112 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
113 struct gk20a_cs_snapshot_fifo_entry *hw_end;
114 struct gk20a_cs_snapshot_fifo_entry *hw_get;
115};
116
117void gk20a_init_css_ops(struct gpu_ops *gops);
118
119#endif /* CSS_GR_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 669ef1b9..1ca8ff77 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -657,6 +657,24 @@ struct gpu_ops {
657 struct { 657 struct {
658 int (*init)(struct gk20a *g); 658 int (*init)(struct gk20a *g);
659 } bios; 659 } bios;
660#if defined(CONFIG_GK20A_CYCLE_STATS)
661 struct {
662 int (*enable_snapshot)(struct channel_gk20a *ch,
663 struct gk20a_cs_snapshot_client *client);
664 void (*disable_snapshot)(struct gr_gk20a *gr);
665 int (*check_data_available)(struct channel_gk20a *ch,
666 u32 *pending,
667 bool *hw_overflow);
668 void (*set_handled_snapshots)(struct gk20a *g, u32 num);
669 u32 (*allocate_perfmon_ids)(struct gk20a_cs_snapshot *data,
670 u32 count);
671 u32 (*release_perfmon_ids)(struct gk20a_cs_snapshot *data,
672 u32 start,
673 u32 count);
674 int (*detach_snapshot)(struct channel_gk20a *ch,
675 struct gk20a_cs_snapshot_client *client);
676 } css;
677#endif
660}; 678};
661 679
662struct nvgpu_bios_ucode { 680struct nvgpu_bios_ucode {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2a351bc3..c337a74a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -603,16 +603,16 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
603int gr_gk20a_debugfs_init(struct gk20a *g); 603int gr_gk20a_debugfs_init(struct gk20a *g);
604 604
605#if defined(CONFIG_GK20A_CYCLE_STATS) 605#if defined(CONFIG_GK20A_CYCLE_STATS)
606int gr_gk20a_css_attach(struct gk20a *g, /* in - main hw structure */ 606int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
607 u32 dmabuf_fd, /* in - dma mapped memory */ 607 u32 dmabuf_fd, /* in - dma mapped memory */
608 u32 perfmon_id_count, /* in - number of perfmons*/ 608 u32 perfmon_id_count, /* in - number of perfmons*/
609 u32 *perfmon_id_start, /* out- index of first pm */ 609 u32 *perfmon_id_start, /* out- index of first pm */
610 /* out - pointer to client data used in later */ 610 /* out - pointer to client data used in later */
611 struct gk20a_cs_snapshot_client **css_client); 611 struct gk20a_cs_snapshot_client **css_client);
612 612
613int gr_gk20a_css_detach(struct gk20a *g, 613int gr_gk20a_css_detach(struct channel_gk20a *ch,
614 struct gk20a_cs_snapshot_client *css_client); 614 struct gk20a_cs_snapshot_client *css_client);
615int gr_gk20a_css_flush(struct gk20a *g, 615int gr_gk20a_css_flush(struct channel_gk20a *ch,
616 struct gk20a_cs_snapshot_client *css_client); 616 struct gk20a_cs_snapshot_client *css_client);
617 617
618void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); 618void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 4da7ffad..550dffa6 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -32,6 +32,7 @@
32#include "hw_proj_gk20a.h" 32#include "hw_proj_gk20a.h"
33#include "tsg_gk20a.h" 33#include "tsg_gk20a.h"
34#include "dbg_gpu_gk20a.h" 34#include "dbg_gpu_gk20a.h"
35#include "css_gr_gk20a.h"
35 36
36static struct gpu_ops gk20a_ops = { 37static struct gpu_ops gk20a_ops = {
37 .clock_gating = { 38 .clock_gating = {
@@ -157,6 +158,9 @@ int gk20a_init_hal(struct gk20a *g)
157 gk20a_init_dbg_session_ops(gops); 158 gk20a_init_dbg_session_ops(gops);
158 gk20a_init_therm_ops(gops); 159 gk20a_init_therm_ops(gops);
159 gk20a_init_tsg_ops(gops); 160 gk20a_init_tsg_ops(gops);
161#if defined(CONFIG_GK20A_CYCLE_STATS)
162 gk20a_init_css_ops(gops);
163#endif
160 gops->name = "gk20a"; 164 gops->name = "gk20a";
161 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; 165 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
162 gops->get_litter_value = gk20a_get_litter_value; 166 gops->get_litter_value = gk20a_get_litter_value;
diff --git a/drivers/gpu/nvgpu/gm206/hal_gm206.c b/drivers/gpu/nvgpu/gm206/hal_gm206.c
index 3c6897ea..6b43c8e9 100644
--- a/drivers/gpu/nvgpu/gm206/hal_gm206.c
+++ b/drivers/gpu/nvgpu/gm206/hal_gm206.c
@@ -41,6 +41,7 @@
41#include "gr_gm206.h" 41#include "gr_gm206.h"
42#include "hw_proj_gm206.h" 42#include "hw_proj_gm206.h"
43#include "gk20a/dbg_gpu_gk20a.h" 43#include "gk20a/dbg_gpu_gk20a.h"
44#include "gk20a/css_gr_gk20a.h"
44 45
45static struct gpu_ops gm206_ops = { 46static struct gpu_ops gm206_ops = {
46 .clock_gating = { 47 .clock_gating = {
@@ -199,6 +200,9 @@ int gm206_init_hal(struct gk20a *g)
199 gm20b_init_cde_ops(gops); 200 gm20b_init_cde_ops(gops);
200 gm20b_init_therm_ops(gops); 201 gm20b_init_therm_ops(gops);
201 gk20a_init_tsg_ops(gops); 202 gk20a_init_tsg_ops(gops);
203#if defined(CONFIG_GK20A_CYCLE_STATS)
204 gk20a_init_css_ops(gops);
205#endif
202 gm206_init_bios(gops); 206 gm206_init_bios(gops);
203 switch(ver){ 207 switch(ver){
204 case GK20A_GPUID_GM206: 208 case GK20A_GPUID_GM206:
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 34e3b605..e30ca96f 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -36,6 +36,7 @@
36#include "therm_gm20b.h" 36#include "therm_gm20b.h"
37#include "hw_proj_gm20b.h" 37#include "hw_proj_gm20b.h"
38#include "gk20a/dbg_gpu_gk20a.h" 38#include "gk20a/dbg_gpu_gk20a.h"
39#include "gk20a/css_gr_gk20a.h"
39 40
40#define FUSE_OPT_PRIV_SEC_DIS_0 0x264 41#define FUSE_OPT_PRIV_SEC_DIS_0 0x264
41#define PRIV_SECURITY_DISABLE 0x01 42#define PRIV_SECURITY_DISABLE 0x01
@@ -226,6 +227,9 @@ int gm20b_init_hal(struct gk20a *g)
226 gm20b_init_cde_ops(gops); 227 gm20b_init_cde_ops(gops);
227 gm20b_init_therm_ops(gops); 228 gm20b_init_therm_ops(gops);
228 gk20a_init_tsg_ops(gops); 229 gk20a_init_tsg_ops(gops);
230#if defined(CONFIG_GK20A_CYCLE_STATS)
231 gk20a_init_css_ops(gops);
232#endif
229 gops->name = "gm20b"; 233 gops->name = "gm20b";
230 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; 234 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
231 gops->get_litter_value = gm20b_get_litter_value; 235 gops->get_litter_value = gm20b_get_litter_value;
diff --git a/drivers/gpu/nvgpu/vgpu/css_vgpu.c b/drivers/gpu/nvgpu/vgpu/css_vgpu.c
new file mode 100644
index 00000000..486d3e88
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16#if defined(CONFIG_GK20A_CYCLE_STATS)
17
18#include <linux/kernel.h>
19#include <linux/nvhost.h>
20#include <linux/tegra-ivc.h>
21#include <linux/tegra_vgpu.h>
22
23#include "gk20a/gk20a.h"
24#include "gk20a/channel_gk20a.h"
25#include "gk20a/platform_gk20a.h"
26#include "gk20a/css_gr_gk20a.h"
27#include "vgpu.h"
28
29struct vgpu_hw_snapshot_buffer {
30 struct tegra_hv_ivm_cookie *cookie;
31 void *buf;
32 struct gk20a_cs_snapshot_fifo_entry *end;
33 struct gk20a_cs_snapshot_fifo_entry *src_get;
34 struct gk20a_cs_snapshot_fifo_entry *src_put;
35};
36
37struct tegra_hv_ivm_cookie *css_cookie;
38
39int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr)
40{
41 struct gk20a *g = gr->g;
42 struct device *dev = g->dev;
43 struct gk20a_cs_snapshot *data = gr->cs_data;
44 struct device_node *np = dev->of_node;
45 struct of_phandle_args args;
46 struct device_node *hv_np;
47 void *buf = NULL;
48 u32 mempool;
49 int err;
50
51 gk20a_dbg_fn("");
52
53 if (data->hw_snapshot)
54 return 0;
55
56 err = of_parse_phandle_with_fixed_args(np,
57 "mempool-css", 1, 0, &args);
58 if (err) {
59 dev_info(dev_from_gk20a(g), "dt missing mempool-css\n");
60 goto fail;
61 }
62
63 hv_np = args.np;
64 mempool = args.args[0];
65 css_cookie = tegra_hv_mempool_reserve(hv_np, mempool);
66 if (IS_ERR(css_cookie)) {
67 dev_info(dev_from_gk20a(g),
68 "mempool %u reserve failed\n", mempool);
69 err = -EINVAL;
70 goto fail;
71 }
72
73 /* Make sure buffer size is large enough */
74 if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) {
75 dev_info(dev_from_gk20a(g), "mempool size %lld too small\n",
76 css_cookie->size);
77 err = -ENOMEM;
78 goto fail;
79 }
80
81#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
82 buf = ioremap_cached(css_cookie->ipa, css_cookie->size);
83#else
84 buf = ioremap_cache(css_cookie->ipa, css_cookie->size);
85#endif
86 if (!buf) {
87 dev_info(dev_from_gk20a(g), "ioremap_cache failed\n");
88 err = -EINVAL;
89 goto fail;
90 }
91
92 data->hw_snapshot = buf;
93 data->hw_end = data->hw_snapshot +
94 css_cookie->size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
95 data->hw_get = data->hw_snapshot;
96 memset(data->hw_snapshot, 0xff, css_cookie->size);
97 return 0;
98fail:
99 if (!IS_ERR_OR_NULL(css_cookie))
100 tegra_hv_mempool_unreserve(css_cookie);
101 return err;
102}
103
104static void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr)
105{
106 struct gk20a_cs_snapshot *data = gr->cs_data;
107
108 if (!data->hw_snapshot)
109 return;
110
111 iounmap(data->hw_snapshot);
112 data->hw_snapshot = NULL;
113
114 tegra_hv_mempool_unreserve(css_cookie);
115
116 gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n");
117}
118
119static int vgpu_css_flush_snapshots(struct channel_gk20a *ch,
120 u32 *pending, bool *hw_overflow)
121{
122 struct gk20a *g = ch->g;
123 struct tegra_vgpu_cmd_msg msg = {};
124 struct tegra_vgpu_channel_cyclestats_snapshot_params *p;
125 struct gr_gk20a *gr = &g->gr;
126 struct gk20a_cs_snapshot *data = gr->cs_data;
127 int err;
128
129 gk20a_dbg_fn("");
130
131 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
132 msg.handle = vgpu_get_handle(g);
133 p = &msg.params.cyclestats_snapshot;
134 p->handle = ch->virt_ctx;
135 p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH;
136 p->buf_info = (uintptr_t)data->hw_get - (uintptr_t)data->hw_snapshot;
137
138 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
139
140 err = (err || msg.ret) ? -1 : 0;
141
142 *pending = p->buf_info;
143 *hw_overflow = p->hw_overflow;
144
145 return err;
146}
147
148static int vgpu_css_attach(struct channel_gk20a *ch,
149 struct gk20a_cs_snapshot_client *cs_client)
150{
151 struct gk20a *g = ch->g;
152 struct tegra_vgpu_cmd_msg msg = {};
153 struct tegra_vgpu_channel_cyclestats_snapshot_params *p =
154 &msg.params.cyclestats_snapshot;
155 int err;
156
157 gk20a_dbg_fn("");
158
159 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
160 msg.handle = vgpu_get_handle(g);
161 p->handle = ch->virt_ctx;
162 p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH;
163 p->perfmon_count = cs_client->perfmon_count;
164
165 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
166 err = err ? err : msg.ret;
167 if (err)
168 gk20a_err(dev_from_gk20a(g), "%s failed", __func__);
169 else
170 cs_client->perfmon_start = p->perfmon_start;
171
172 return err;
173}
174
175static int vgpu_css_detach(struct channel_gk20a *ch,
176 struct gk20a_cs_snapshot_client *cs_client)
177{
178 struct gk20a *g = ch->g;
179 struct tegra_vgpu_cmd_msg msg = {};
180 struct tegra_vgpu_channel_cyclestats_snapshot_params *p =
181 &msg.params.cyclestats_snapshot;
182 int err;
183
184 gk20a_dbg_fn("");
185
186 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
187 msg.handle = vgpu_get_handle(g);
188 p->handle = ch->virt_ctx;
189 p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH;
190 p->perfmon_start = cs_client->perfmon_start;
191 p->perfmon_count = cs_client->perfmon_count;
192
193 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
194 err = err ? err : msg.ret;
195 if (err)
196 gk20a_err(dev_from_gk20a(g), "%s failed", __func__);
197
198 return err;
199}
200
201static int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch,
202 struct gk20a_cs_snapshot_client *cs_client)
203{
204 int ret;
205
206 ret = vgpu_css_attach(ch, cs_client);
207 if (ret)
208 return ret;
209
210 ret = vgpu_css_init_snapshot_buffer(&ch->g->gr);
211 return ret;
212}
213
214void vgpu_init_css_ops(struct gpu_ops *gops)
215{
216 gops->css.enable_snapshot = vgpu_css_enable_snapshot_buffer;
217 gops->css.disable_snapshot = vgpu_css_release_snapshot_buffer;
218 gops->css.check_data_available = vgpu_css_flush_snapshots;
219 gops->css.detach_snapshot = vgpu_css_detach;
220}
221#endif /* CONFIG_GK20A_CYCLE_STATS */
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 783b2f55..89223091 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -875,6 +875,10 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
875 875
876 gr->g = g; 876 gr->g = g;
877 877
878#if defined(CONFIG_GK20A_CYCLE_STATS)
879 mutex_init(&g->gr.cs_lock);
880#endif
881
878 err = vgpu_gr_init_gr_config(g, gr); 882 err = vgpu_gr_init_gr_config(g, gr);
879 if (err) 883 if (err)
880 goto clean_up; 884 goto clean_up;
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 27d98eb8..bd332583 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -334,6 +334,9 @@ void vgpu_init_hal_common(struct gk20a *g)
334 vgpu_init_dbg_session_ops(gops); 334 vgpu_init_dbg_session_ops(gops);
335 vgpu_init_fecs_trace_ops(gops); 335 vgpu_init_fecs_trace_ops(gops);
336 vgpu_init_tsg_ops(gops); 336 vgpu_init_tsg_ops(gops);
337#if defined(CONFIG_GK20A_CYCLE_STATS)
338 vgpu_init_css_ops(gops);
339#endif
337 gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; 340 gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
338 gops->read_ptimer = vgpu_read_ptimer; 341 gops->read_ptimer = vgpu_read_ptimer;
339} 342}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h
index 6f1059b8..4a7a6b6c 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.h
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.h
@@ -85,6 +85,9 @@ void vgpu_init_ltc_ops(struct gpu_ops *gops);
85void vgpu_init_mm_ops(struct gpu_ops *gops); 85void vgpu_init_mm_ops(struct gpu_ops *gops);
86void vgpu_init_debug_ops(struct gpu_ops *gops); 86void vgpu_init_debug_ops(struct gpu_ops *gops);
87void vgpu_init_tsg_ops(struct gpu_ops *gops); 87void vgpu_init_tsg_ops(struct gpu_ops *gops);
88#if defined(CONFIG_GK20A_CYCLE_STATS)
89void vgpu_init_css_ops(struct gpu_ops *gops);
90#endif
88int vgpu_init_mm_support(struct gk20a *g); 91int vgpu_init_mm_support(struct gk20a *g);
89int vgpu_init_gr_support(struct gk20a *g); 92int vgpu_init_gr_support(struct gk20a *g);
90int vgpu_init_fifo_support(struct gk20a *g); 93int vgpu_init_fifo_support(struct gk20a *g);
@@ -161,6 +164,11 @@ static inline void vgpu_init_mm_ops(struct gpu_ops *gops)
161static inline void vgpu_init_debug_ops(struct gpu_ops *gops) 164static inline void vgpu_init_debug_ops(struct gpu_ops *gops)
162{ 165{
163} 166}
167#if defined(CONFIG_GK20A_CYCLE_STATS)
168static inline void vgpu_init_css_ops(struct gpu_ops *gops)
169{
170}
171#endif
164static inline int vgpu_init_mm_support(struct gk20a *g) 172static inline int vgpu_init_mm_support(struct gk20a *g)
165{ 173{
166 return -ENOSYS; 174 return -ENOSYS;