diff options
author | Peter Daifuku <pdaifuku@nvidia.com> | 2016-08-31 20:04:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-08 19:04:09 -0400 |
commit | 9aa7de15c2a644e9c7e9c157e49087e66d4ac3d0 (patch) | |
tree | e5080886f09aa75c6a3cc83e5b27f8f7553678a4 /drivers/gpu/nvgpu/gk20a | |
parent | 70cad5fbb593602a49f91e57c04d1da0334b3a49 (diff) |
gpu: nvgpu: vgpu: cyclestat snapshot support
Add support for cyclestats snapshots in the virtual case
Bug 1700143
JIRA EVLR-278
Change-Id: I376a8804d57324f43eb16452d857a3b7bb0ecc90
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1211547
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 233 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | 119 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 4 |
6 files changed, 249 insertions, 143 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 49711af9..d23a8026 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -603,7 +603,7 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) | |||
603 | 603 | ||
604 | mutex_lock(&ch->cs_client_mutex); | 604 | mutex_lock(&ch->cs_client_mutex); |
605 | if (ch->cs_client) | 605 | if (ch->cs_client) |
606 | ret = gr_gk20a_css_flush(ch->g, ch->cs_client); | 606 | ret = gr_gk20a_css_flush(ch, ch->cs_client); |
607 | else | 607 | else |
608 | ret = -EBADF; | 608 | ret = -EBADF; |
609 | mutex_unlock(&ch->cs_client_mutex); | 609 | mutex_unlock(&ch->cs_client_mutex); |
@@ -622,7 +622,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | |||
622 | if (ch->cs_client) { | 622 | if (ch->cs_client) { |
623 | ret = -EEXIST; | 623 | ret = -EEXIST; |
624 | } else { | 624 | } else { |
625 | ret = gr_gk20a_css_attach(ch->g, | 625 | ret = gr_gk20a_css_attach(ch, |
626 | dmabuf_fd, | 626 | dmabuf_fd, |
627 | perfmon_id_count, | 627 | perfmon_id_count, |
628 | perfmon_id_start, | 628 | perfmon_id_start, |
@@ -639,7 +639,7 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch) | |||
639 | 639 | ||
640 | mutex_lock(&ch->cs_client_mutex); | 640 | mutex_lock(&ch->cs_client_mutex); |
641 | if (ch->cs_client) { | 641 | if (ch->cs_client) { |
642 | ret = gr_gk20a_css_detach(ch->g, ch->cs_client); | 642 | ret = gr_gk20a_css_detach(ch, ch->cs_client); |
643 | ch->cs_client = NULL; | 643 | ch->cs_client = NULL; |
644 | } else { | 644 | } else { |
645 | ret = 0; | 645 | ret = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 62f60761..71614d6e 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -25,93 +25,13 @@ | |||
25 | #include "gk20a.h" | 25 | #include "gk20a.h" |
26 | #include "hw_perf_gk20a.h" | 26 | #include "hw_perf_gk20a.h" |
27 | #include "hw_mc_gk20a.h" | 27 | #include "hw_mc_gk20a.h" |
28 | 28 | #include "css_gr_gk20a.h" | |
29 | |||
30 | |||
31 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
32 | struct gk20a_cs_snapshot_fifo { | ||
33 | /* layout description of the buffer */ | ||
34 | u32 start; | ||
35 | u32 end; | ||
36 | |||
37 | /* snafu bits */ | ||
38 | u32 hw_overflow_events_occured; | ||
39 | u32 sw_overflow_events_occured; | ||
40 | |||
41 | /* the kernel copies new entries to put and | ||
42 | * increment the put++. if put == get then | ||
43 | * overflowEventsOccured++ | ||
44 | */ | ||
45 | u32 put; | ||
46 | u32 _reserved10; | ||
47 | u32 _reserved11; | ||
48 | u32 _reserved12; | ||
49 | |||
50 | /* the driver/client reads from get until | ||
51 | * put==get, get++ */ | ||
52 | u32 get; | ||
53 | u32 _reserved20; | ||
54 | u32 _reserved21; | ||
55 | u32 _reserved22; | ||
56 | |||
57 | /* unused */ | ||
58 | u32 _reserved30; | ||
59 | u32 _reserved31; | ||
60 | u32 _reserved32; | ||
61 | u32 _reserved33; | ||
62 | }; | ||
63 | |||
64 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
65 | struct gk20a_cs_snapshot_fifo_entry { | ||
66 | /* global 48 timestamp */ | ||
67 | u32 timestamp31_00:32; | ||
68 | u32 timestamp39_32:8; | ||
69 | |||
70 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
71 | u32 perfmon_id:8; | ||
72 | |||
73 | /* typically samples_counter is wired to #pmtrigger count */ | ||
74 | u32 samples_counter:12; | ||
75 | |||
76 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
77 | u32 ds:1; | ||
78 | u32 sz:1; | ||
79 | u32 zero0:1; | ||
80 | u32 zero1:1; | ||
81 | |||
82 | /* counter results */ | ||
83 | u32 event_cnt:32; | ||
84 | u32 trigger0_cnt:32; | ||
85 | u32 trigger1_cnt:32; | ||
86 | u32 sample_cnt:32; | ||
87 | |||
88 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
89 | u16 local_trigger_b_count:16; | ||
90 | u16 book_mark_b:16; | ||
91 | u16 local_trigger_a_count:16; | ||
92 | u16 book_mark_a:16; | ||
93 | }; | ||
94 | |||
95 | |||
96 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
97 | struct gk20a_cs_snapshot_client { | ||
98 | struct list_head list; | ||
99 | u32 dmabuf_fd; | ||
100 | struct dma_buf *dma_handler; | ||
101 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
102 | u32 snapshot_size; | ||
103 | u32 perfmon_start; | ||
104 | u32 perfmon_count; | ||
105 | }; | ||
106 | 29 | ||
107 | /* check client for pointed perfmon ownership */ | 30 | /* check client for pointed perfmon ownership */ |
108 | #define CONTAINS_PERFMON(cl, pm) \ | 31 | #define CONTAINS_PERFMON(cl, pm) \ |
109 | ((cl)->perfmon_start <= (pm) && \ | 32 | ((cl)->perfmon_start <= (pm) && \ |
110 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) | 33 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) |
111 | 34 | ||
112 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
113 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
114 | |||
115 | /* the minimal size of client buffer */ | 35 | /* the minimal size of client buffer */ |
116 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | 36 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ |
117 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | 37 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ |
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client { | |||
131 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | 51 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ |
132 | #define CSS_MAX_PERFMON_IDS 256 | 52 | #define CSS_MAX_PERFMON_IDS 256 |
133 | 53 | ||
134 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
135 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) | ||
136 | |||
137 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
138 | struct gk20a_cs_snapshot { | ||
139 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; | ||
140 | struct list_head clients; | ||
141 | struct mem_desc hw_memdesc; | ||
142 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
143 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
144 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
145 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
146 | }; | ||
147 | |||
148 | /* reports whether the hw queue overflowed */ | 54 | /* reports whether the hw queue overflowed */ |
149 | static inline bool css_hw_get_overflow_status(struct gk20a *g) | 55 | static inline bool css_hw_get_overflow_status(struct gk20a *g) |
150 | { | 56 | { |
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr) | |||
215 | return 0; | 121 | return 0; |
216 | } | 122 | } |
217 | 123 | ||
218 | static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) | 124 | static int css_hw_enable_snapshot(struct channel_gk20a *ch, |
125 | struct gk20a_cs_snapshot_client *cs_client) | ||
219 | { | 126 | { |
220 | struct gk20a *g = gr->g; | 127 | struct gk20a *g = ch->g; |
128 | struct gr_gk20a *gr = &g->gr; | ||
221 | struct gk20a_cs_snapshot *data = gr->cs_data; | 129 | struct gk20a_cs_snapshot *data = gr->cs_data; |
130 | u32 snapshot_size = cs_client->snapshot_size; | ||
222 | int ret; | 131 | int ret; |
223 | 132 | ||
224 | u32 virt_addr_lo; | 133 | u32 virt_addr_lo; |
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr) | |||
317 | 226 | ||
318 | static void css_gr_free_shared_data(struct gr_gk20a *gr) | 227 | static void css_gr_free_shared_data(struct gr_gk20a *gr) |
319 | { | 228 | { |
229 | struct gk20a *g = gr->g; | ||
230 | |||
320 | if (gr->cs_data) { | 231 | if (gr->cs_data) { |
321 | /* the clients list is expected to be empty */ | 232 | /* the clients list is expected to be empty */ |
322 | css_hw_disable_snapshot(gr); | 233 | g->ops.css.disable_snapshot(gr); |
323 | 234 | ||
324 | /* release the objects */ | 235 | /* release the objects */ |
325 | kfree(gr->cs_data); | 236 | kfree(gr->cs_data); |
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon) | |||
344 | return NULL; | 255 | return NULL; |
345 | } | 256 | } |
346 | 257 | ||
347 | static int css_gr_flush_snapshots(struct gr_gk20a *gr) | 258 | static int css_gr_flush_snapshots(struct channel_gk20a *ch) |
348 | { | 259 | { |
349 | struct gk20a *g = gr->g; | 260 | struct gk20a *g = ch->g; |
261 | struct gr_gk20a *gr = &g->gr; | ||
350 | struct gk20a_cs_snapshot *css = gr->cs_data; | 262 | struct gk20a_cs_snapshot *css = gr->cs_data; |
351 | struct gk20a_cs_snapshot_client *cur; | 263 | struct gk20a_cs_snapshot_client *cur; |
352 | u32 pending; | 264 | u32 pending, completed; |
265 | bool hw_overflow; | ||
266 | int err; | ||
353 | 267 | ||
354 | /* variables for iterating over HW entries */ | 268 | /* variables for iterating over HW entries */ |
355 | u32 sid; | 269 | u32 sid; |
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
360 | struct gk20a_cs_snapshot_fifo *dst; | 274 | struct gk20a_cs_snapshot_fifo *dst; |
361 | struct gk20a_cs_snapshot_fifo_entry *dst_get; | 275 | struct gk20a_cs_snapshot_fifo_entry *dst_get; |
362 | struct gk20a_cs_snapshot_fifo_entry *dst_put; | 276 | struct gk20a_cs_snapshot_fifo_entry *dst_put; |
277 | struct gk20a_cs_snapshot_fifo_entry *dst_nxt; | ||
363 | struct gk20a_cs_snapshot_fifo_entry *dst_head; | 278 | struct gk20a_cs_snapshot_fifo_entry *dst_head; |
364 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; | 279 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; |
365 | 280 | ||
366 | if (!css) | 281 | if (!css) |
367 | return -EINVAL; | 282 | return -EINVAL; |
368 | 283 | ||
369 | if (!css->hw_snapshot) | ||
370 | return -EINVAL; | ||
371 | |||
372 | if (list_empty(&css->clients)) | 284 | if (list_empty(&css->clients)) |
373 | return -EBADF; | 285 | return -EBADF; |
374 | 286 | ||
375 | /* check data available */ | 287 | /* check data available */ |
376 | pending = css_hw_get_pending_snapshots(g); | 288 | err = g->ops.css.check_data_available(ch, &pending, &hw_overflow); |
289 | if (err) | ||
290 | return err; | ||
291 | |||
377 | if (!pending) | 292 | if (!pending) |
378 | return 0; | 293 | return 0; |
379 | 294 | ||
380 | if (css_hw_get_overflow_status(g)) { | 295 | if (hw_overflow) { |
381 | struct list_head *pos; | 296 | struct list_head *pos; |
382 | 297 | ||
383 | list_for_each(pos, &css->clients) { | 298 | list_for_each(pos, &css->clients) { |
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
387 | } | 302 | } |
388 | 303 | ||
389 | gk20a_warn(dev_from_gk20a(g), | 304 | gk20a_warn(dev_from_gk20a(g), |
390 | "cyclestats: hardware overflow detected\n"); | 305 | "cyclestats: hardware overflow detected\n"); |
391 | } | 306 | } |
392 | 307 | ||
393 | /* proceed all items in HW buffer */ | 308 | /* process all items in HW buffer */ |
394 | sid = 0; | 309 | sid = 0; |
310 | completed = 0; | ||
395 | cur = NULL; | 311 | cur = NULL; |
396 | dst = NULL; | 312 | dst = NULL; |
397 | dst_put = NULL; | 313 | dst_put = NULL; |
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
419 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); | 335 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); |
420 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); | 336 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); |
421 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); | 337 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); |
422 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1; | 338 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end); |
339 | |||
340 | dst_nxt = dst_put + 1; | ||
341 | if (dst_nxt == dst_tail) | ||
342 | dst_nxt = dst_head; | ||
423 | } else { | 343 | } else { |
424 | /* client not found - skipping this entry */ | 344 | /* client not found - skipping this entry */ |
425 | gk20a_warn(dev_from_gk20a(g), | 345 | gk20a_warn(dev_from_gk20a(g), |
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
430 | } | 350 | } |
431 | 351 | ||
432 | /* check for software overflows */ | 352 | /* check for software overflows */ |
433 | if (dst_put + 1 == dst_get || | 353 | if (dst_nxt == dst_get) { |
434 | (dst_put == dst_tail && dst_get == dst_head)) { | ||
435 | /* no data copy, no pointer updates */ | 354 | /* no data copy, no pointer updates */ |
436 | dst->sw_overflow_events_occured++; | 355 | dst->sw_overflow_events_occured++; |
437 | gk20a_warn(dev_from_gk20a(g), | 356 | gk20a_warn(dev_from_gk20a(g), |
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
439 | src->perfmon_id); | 358 | src->perfmon_id); |
440 | } else { | 359 | } else { |
441 | *dst_put = *src; | 360 | *dst_put = *src; |
442 | if (dst_put == dst_tail) | 361 | completed++; |
443 | dst_put = dst_head; | 362 | |
444 | else | 363 | dst_put = dst_nxt++; |
445 | dst_put++; | 364 | |
365 | if (dst_nxt == dst_tail) | ||
366 | dst_nxt = dst_head; | ||
446 | } | 367 | } |
447 | 368 | ||
448 | next_hw_fifo_entry: | 369 | next_hw_fifo_entry: |
@@ -465,14 +386,17 @@ next_hw_fifo_entry: | |||
465 | (css->hw_end - css->hw_get) * sizeof(*src)); | 386 | (css->hw_end - css->hw_get) * sizeof(*src)); |
466 | } | 387 | } |
467 | gr->cs_data->hw_get = src; | 388 | gr->cs_data->hw_get = src; |
468 | css_hw_set_handled_snapshots(g, sid); | 389 | |
469 | if (pending != sid) { | 390 | if (g->ops.css.set_handled_snapshots) |
391 | g->ops.css.set_handled_snapshots(g, sid); | ||
392 | |||
393 | if (completed != sid) { | ||
470 | /* not all entries proceed correctly. some of problems */ | 394 | /* not all entries proceed correctly. some of problems */ |
471 | /* reported as overflows, some as orphaned perfmons, */ | 395 | /* reported as overflows, some as orphaned perfmons, */ |
472 | /* but it will be better notify with summary about it */ | 396 | /* but it will be better notify with summary about it */ |
473 | gk20a_warn(dev_from_gk20a(g), | 397 | gk20a_warn(dev_from_gk20a(g), |
474 | "cyclestats: done %u from %u entries\n", | 398 | "cyclestats: completed %u from %u entries\n", |
475 | sid, pending); | 399 | completed, pending); |
476 | } | 400 | } |
477 | 401 | ||
478 | return 0; | 402 | return 0; |
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | |||
511 | } | 435 | } |
512 | 436 | ||
513 | 437 | ||
514 | static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | 438 | static int css_gr_free_client_data(struct gk20a *g, |
439 | struct gk20a_cs_snapshot *data, | ||
515 | struct gk20a_cs_snapshot_client *client) | 440 | struct gk20a_cs_snapshot_client *client) |
516 | { | 441 | { |
517 | int ret = 0; | 442 | int ret = 0; |
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | |||
519 | if (client->list.next && client->list.prev) | 444 | if (client->list.next && client->list.prev) |
520 | list_del(&client->list); | 445 | list_del(&client->list); |
521 | 446 | ||
522 | if (client->perfmon_start && client->perfmon_count) { | 447 | if (client->perfmon_start && client->perfmon_count |
523 | if (client->perfmon_count != css_gr_release_perfmon_ids(data, | 448 | && g->ops.css.release_perfmon_ids) { |
449 | if (client->perfmon_count != g->ops.css.release_perfmon_ids(data, | ||
524 | client->perfmon_start, client->perfmon_count)) | 450 | client->perfmon_start, client->perfmon_count)) |
525 | ret = -EINVAL; | 451 | ret = -EINVAL; |
526 | } | 452 | } |
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | |||
536 | return ret; | 462 | return ret; |
537 | } | 463 | } |
538 | 464 | ||
539 | static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | 465 | static int css_gr_create_client_data(struct gk20a *g, |
466 | struct gk20a_cs_snapshot *data, | ||
540 | u32 dmabuf_fd, u32 perfmon_count, | 467 | u32 dmabuf_fd, u32 perfmon_count, |
541 | struct gk20a_cs_snapshot_client **client) | 468 | struct gk20a_cs_snapshot_client **client) |
542 | { | 469 | { |
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | |||
581 | cur->snapshot->put = cur->snapshot->start; | 508 | cur->snapshot->put = cur->snapshot->start; |
582 | 509 | ||
583 | cur->perfmon_count = perfmon_count; | 510 | cur->perfmon_count = perfmon_count; |
584 | if (cur->perfmon_count) { | 511 | |
585 | cur->perfmon_start = css_gr_allocate_perfmon_ids(data, | 512 | /* In virtual case, perfmon ID allocation is handled by the server |
513 | * at the time of the attach (allocate_perfmon_ids is NULL in this case) | ||
514 | */ | ||
515 | if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) { | ||
516 | cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data, | ||
586 | cur->perfmon_count); | 517 | cur->perfmon_count); |
587 | if (!cur->perfmon_start) { | 518 | if (!cur->perfmon_start) { |
588 | ret = -ENOENT; | 519 | ret = -ENOENT; |
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | |||
598 | failed: | 529 | failed: |
599 | *client = NULL; | 530 | *client = NULL; |
600 | if (cur) | 531 | if (cur) |
601 | css_gr_free_client_data(data, cur); | 532 | css_gr_free_client_data(g, data, cur); |
602 | 533 | ||
603 | return ret; | 534 | return ret; |
604 | } | 535 | } |
605 | 536 | ||
606 | 537 | ||
607 | int gr_gk20a_css_attach(struct gk20a *g, | 538 | int gr_gk20a_css_attach(struct channel_gk20a *ch, |
608 | u32 dmabuf_fd, | 539 | u32 dmabuf_fd, |
609 | u32 perfmon_count, | 540 | u32 perfmon_count, |
610 | u32 *perfmon_start, | 541 | u32 *perfmon_start, |
611 | struct gk20a_cs_snapshot_client **cs_client) | 542 | struct gk20a_cs_snapshot_client **cs_client) |
612 | { | 543 | { |
613 | int ret = 0; | 544 | int ret = 0; |
545 | struct gk20a *g = ch->g; | ||
614 | struct gr_gk20a *gr; | 546 | struct gr_gk20a *gr; |
615 | 547 | ||
616 | /* we must have a placeholder to store pointer to client structure */ | 548 | /* we must have a placeholder to store pointer to client structure */ |
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g, | |||
630 | if (ret) | 562 | if (ret) |
631 | goto failed; | 563 | goto failed; |
632 | 564 | ||
633 | ret = css_gr_create_client_data(gr->cs_data, | 565 | ret = css_gr_create_client_data(g, gr->cs_data, |
634 | dmabuf_fd, | 566 | dmabuf_fd, |
635 | perfmon_count, | 567 | perfmon_count, |
636 | cs_client); | 568 | cs_client); |
637 | if (ret) | 569 | if (ret) |
638 | goto failed; | 570 | goto failed; |
639 | 571 | ||
640 | ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size); | 572 | ret = g->ops.css.enable_snapshot(ch, *cs_client); |
641 | if (ret) | 573 | if (ret) |
642 | goto failed; | 574 | goto failed; |
643 | 575 | ||
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g, | |||
651 | failed: | 583 | failed: |
652 | if (gr->cs_data) { | 584 | if (gr->cs_data) { |
653 | if (*cs_client) { | 585 | if (*cs_client) { |
654 | css_gr_free_client_data(gr->cs_data, *cs_client); | 586 | css_gr_free_client_data(g, gr->cs_data, *cs_client); |
655 | *cs_client = NULL; | 587 | *cs_client = NULL; |
656 | } | 588 | } |
657 | 589 | ||
@@ -666,10 +598,11 @@ failed: | |||
666 | return ret; | 598 | return ret; |
667 | } | 599 | } |
668 | 600 | ||
669 | int gr_gk20a_css_detach(struct gk20a *g, | 601 | int gr_gk20a_css_detach(struct channel_gk20a *ch, |
670 | struct gk20a_cs_snapshot_client *cs_client) | 602 | struct gk20a_cs_snapshot_client *cs_client) |
671 | { | 603 | { |
672 | int ret = 0; | 604 | int ret = 0; |
605 | struct gk20a *g = ch->g; | ||
673 | struct gr_gk20a *gr; | 606 | struct gr_gk20a *gr; |
674 | 607 | ||
675 | if (!cs_client) | 608 | if (!cs_client) |
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g, | |||
680 | if (gr->cs_data) { | 613 | if (gr->cs_data) { |
681 | struct gk20a_cs_snapshot *data = gr->cs_data; | 614 | struct gk20a_cs_snapshot *data = gr->cs_data; |
682 | 615 | ||
683 | ret = css_gr_free_client_data(data, cs_client); | 616 | if (g->ops.css.detach_snapshot) |
617 | g->ops.css.detach_snapshot(ch, cs_client); | ||
618 | |||
619 | ret = css_gr_free_client_data(g, data, cs_client); | ||
684 | if (list_empty(&data->clients)) | 620 | if (list_empty(&data->clients)) |
685 | css_gr_free_shared_data(gr); | 621 | css_gr_free_shared_data(gr); |
686 | } else { | 622 | } else { |
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g, | |||
691 | return ret; | 627 | return ret; |
692 | } | 628 | } |
693 | 629 | ||
694 | int gr_gk20a_css_flush(struct gk20a *g, | 630 | int gr_gk20a_css_flush(struct channel_gk20a *ch, |
695 | struct gk20a_cs_snapshot_client *cs_client) | 631 | struct gk20a_cs_snapshot_client *cs_client) |
696 | { | 632 | { |
697 | int ret = 0; | 633 | int ret = 0; |
634 | struct gk20a *g = ch->g; | ||
698 | struct gr_gk20a *gr; | 635 | struct gr_gk20a *gr; |
699 | 636 | ||
700 | if (!cs_client) | 637 | if (!cs_client) |
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g, | |||
702 | 639 | ||
703 | gr = &g->gr; | 640 | gr = &g->gr; |
704 | mutex_lock(&gr->cs_lock); | 641 | mutex_lock(&gr->cs_lock); |
705 | ret = css_gr_flush_snapshots(gr); | 642 | ret = css_gr_flush_snapshots(ch); |
706 | mutex_unlock(&gr->cs_lock); | 643 | mutex_unlock(&gr->cs_lock); |
707 | 644 | ||
708 | return ret; | 645 | return ret; |
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | |||
718 | mutex_unlock(&gr->cs_lock); | 655 | mutex_unlock(&gr->cs_lock); |
719 | mutex_destroy(&gr->cs_lock); | 656 | mutex_destroy(&gr->cs_lock); |
720 | } | 657 | } |
658 | |||
659 | static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
660 | bool *hw_overflow) | ||
661 | { | ||
662 | struct gk20a *g = ch->g; | ||
663 | struct gr_gk20a *gr = &g->gr; | ||
664 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
665 | |||
666 | if (!css->hw_snapshot) | ||
667 | return -EINVAL; | ||
668 | |||
669 | *pending = css_hw_get_pending_snapshots(g); | ||
670 | if (!*pending) | ||
671 | return 0; | ||
672 | |||
673 | *hw_overflow = css_hw_get_overflow_status(g); | ||
674 | return 0; | ||
675 | } | ||
676 | |||
677 | void gk20a_init_css_ops(struct gpu_ops *gops) | ||
678 | { | ||
679 | gops->css.enable_snapshot = css_hw_enable_snapshot; | ||
680 | gops->css.disable_snapshot = css_hw_disable_snapshot; | ||
681 | gops->css.check_data_available = css_hw_check_data_available; | ||
682 | gops->css.set_handled_snapshots = css_hw_set_handled_snapshots; | ||
683 | gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids; | ||
684 | gops->css.release_perfmon_ids = css_gr_release_perfmon_ids; | ||
685 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h new file mode 100644 index 00000000..be638abf --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef CSS_GR_GK20A_H | ||
20 | #define CSS_GR_GK20A_H | ||
21 | |||
22 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
23 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
24 | |||
25 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
26 | struct gk20a_cs_snapshot_fifo { | ||
27 | /* layout description of the buffer */ | ||
28 | u32 start; | ||
29 | u32 end; | ||
30 | |||
31 | /* snafu bits */ | ||
32 | u32 hw_overflow_events_occured; | ||
33 | u32 sw_overflow_events_occured; | ||
34 | |||
35 | /* the kernel copies new entries to put and | ||
36 | * increment the put++. if put == get then | ||
37 | * overflowEventsOccured++ | ||
38 | */ | ||
39 | u32 put; | ||
40 | u32 _reserved10; | ||
41 | u32 _reserved11; | ||
42 | u32 _reserved12; | ||
43 | |||
44 | /* the driver/client reads from get until | ||
45 | * put==get, get++ */ | ||
46 | u32 get; | ||
47 | u32 _reserved20; | ||
48 | u32 _reserved21; | ||
49 | u32 _reserved22; | ||
50 | |||
51 | /* unused */ | ||
52 | u32 _reserved30; | ||
53 | u32 _reserved31; | ||
54 | u32 _reserved32; | ||
55 | u32 _reserved33; | ||
56 | }; | ||
57 | |||
58 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
59 | struct gk20a_cs_snapshot_fifo_entry { | ||
60 | /* global 48 timestamp */ | ||
61 | u32 timestamp31_00:32; | ||
62 | u32 timestamp39_32:8; | ||
63 | |||
64 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
65 | u32 perfmon_id:8; | ||
66 | |||
67 | /* typically samples_counter is wired to #pmtrigger count */ | ||
68 | u32 samples_counter:12; | ||
69 | |||
70 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
71 | u32 ds:1; | ||
72 | u32 sz:1; | ||
73 | u32 zero0:1; | ||
74 | u32 zero1:1; | ||
75 | |||
76 | /* counter results */ | ||
77 | u32 event_cnt:32; | ||
78 | u32 trigger0_cnt:32; | ||
79 | u32 trigger1_cnt:32; | ||
80 | u32 sample_cnt:32; | ||
81 | |||
82 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
83 | u16 local_trigger_b_count:16; | ||
84 | u16 book_mark_b:16; | ||
85 | u16 local_trigger_a_count:16; | ||
86 | u16 book_mark_a:16; | ||
87 | }; | ||
88 | |||
89 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
90 | struct gk20a_cs_snapshot_client { | ||
91 | struct list_head list; | ||
92 | u32 dmabuf_fd; | ||
93 | struct dma_buf *dma_handler; | ||
94 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
95 | u32 snapshot_size; | ||
96 | u32 perfmon_start; | ||
97 | u32 perfmon_count; | ||
98 | }; | ||
99 | |||
100 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
101 | #define CSS_MAX_PERFMON_IDS 256 | ||
102 | |||
103 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
104 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) | ||
105 | |||
106 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
107 | struct gk20a_cs_snapshot { | ||
108 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; | ||
109 | struct list_head clients; | ||
110 | struct mem_desc hw_memdesc; | ||
111 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
112 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
113 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
114 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
115 | }; | ||
116 | |||
117 | void gk20a_init_css_ops(struct gpu_ops *gops); | ||
118 | |||
119 | #endif /* CSS_GR_GK20A_H */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 669ef1b9..1ca8ff77 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -657,6 +657,24 @@ struct gpu_ops { | |||
657 | struct { | 657 | struct { |
658 | int (*init)(struct gk20a *g); | 658 | int (*init)(struct gk20a *g); |
659 | } bios; | 659 | } bios; |
660 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
661 | struct { | ||
662 | int (*enable_snapshot)(struct channel_gk20a *ch, | ||
663 | struct gk20a_cs_snapshot_client *client); | ||
664 | void (*disable_snapshot)(struct gr_gk20a *gr); | ||
665 | int (*check_data_available)(struct channel_gk20a *ch, | ||
666 | u32 *pending, | ||
667 | bool *hw_overflow); | ||
668 | void (*set_handled_snapshots)(struct gk20a *g, u32 num); | ||
669 | u32 (*allocate_perfmon_ids)(struct gk20a_cs_snapshot *data, | ||
670 | u32 count); | ||
671 | u32 (*release_perfmon_ids)(struct gk20a_cs_snapshot *data, | ||
672 | u32 start, | ||
673 | u32 count); | ||
674 | int (*detach_snapshot)(struct channel_gk20a *ch, | ||
675 | struct gk20a_cs_snapshot_client *client); | ||
676 | } css; | ||
677 | #endif | ||
660 | }; | 678 | }; |
661 | 679 | ||
662 | struct nvgpu_bios_ucode { | 680 | struct nvgpu_bios_ucode { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2a351bc3..c337a74a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -603,16 +603,16 @@ int gr_gk20a_halt_pipe(struct gk20a *g); | |||
603 | int gr_gk20a_debugfs_init(struct gk20a *g); | 603 | int gr_gk20a_debugfs_init(struct gk20a *g); |
604 | 604 | ||
605 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 605 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
606 | int gr_gk20a_css_attach(struct gk20a *g, /* in - main hw structure */ | 606 | int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ |
607 | u32 dmabuf_fd, /* in - dma mapped memory */ | 607 | u32 dmabuf_fd, /* in - dma mapped memory */ |
608 | u32 perfmon_id_count, /* in - number of perfmons*/ | 608 | u32 perfmon_id_count, /* in - number of perfmons*/ |
609 | u32 *perfmon_id_start, /* out- index of first pm */ | 609 | u32 *perfmon_id_start, /* out- index of first pm */ |
610 | /* out - pointer to client data used in later */ | 610 | /* out - pointer to client data used in later */ |
611 | struct gk20a_cs_snapshot_client **css_client); | 611 | struct gk20a_cs_snapshot_client **css_client); |
612 | 612 | ||
613 | int gr_gk20a_css_detach(struct gk20a *g, | 613 | int gr_gk20a_css_detach(struct channel_gk20a *ch, |
614 | struct gk20a_cs_snapshot_client *css_client); | 614 | struct gk20a_cs_snapshot_client *css_client); |
615 | int gr_gk20a_css_flush(struct gk20a *g, | 615 | int gr_gk20a_css_flush(struct channel_gk20a *ch, |
616 | struct gk20a_cs_snapshot_client *css_client); | 616 | struct gk20a_cs_snapshot_client *css_client); |
617 | 617 | ||
618 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); | 618 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 4da7ffad..550dffa6 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include "hw_proj_gk20a.h" | 32 | #include "hw_proj_gk20a.h" |
33 | #include "tsg_gk20a.h" | 33 | #include "tsg_gk20a.h" |
34 | #include "dbg_gpu_gk20a.h" | 34 | #include "dbg_gpu_gk20a.h" |
35 | #include "css_gr_gk20a.h" | ||
35 | 36 | ||
36 | static struct gpu_ops gk20a_ops = { | 37 | static struct gpu_ops gk20a_ops = { |
37 | .clock_gating = { | 38 | .clock_gating = { |
@@ -157,6 +158,9 @@ int gk20a_init_hal(struct gk20a *g) | |||
157 | gk20a_init_dbg_session_ops(gops); | 158 | gk20a_init_dbg_session_ops(gops); |
158 | gk20a_init_therm_ops(gops); | 159 | gk20a_init_therm_ops(gops); |
159 | gk20a_init_tsg_ops(gops); | 160 | gk20a_init_tsg_ops(gops); |
161 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
162 | gk20a_init_css_ops(gops); | ||
163 | #endif | ||
160 | gops->name = "gk20a"; | 164 | gops->name = "gk20a"; |
161 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 165 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
162 | gops->get_litter_value = gk20a_get_litter_value; | 166 | gops->get_litter_value = gk20a_get_litter_value; |