diff options
author | Peter Daifuku <pdaifuku@nvidia.com> | 2016-08-31 20:04:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-08 19:04:09 -0400 |
commit | 9aa7de15c2a644e9c7e9c157e49087e66d4ac3d0 (patch) | |
tree | e5080886f09aa75c6a3cc83e5b27f8f7553678a4 | |
parent | 70cad5fbb593602a49f91e57c04d1da0334b3a49 (diff) |
gpu: nvgpu: vgpu: cyclestat snapshot support
Add support for cyclestats snapshots in the virtual case
Bug 1700143
JIRA EVLR-278
Change-Id: I376a8804d57324f43eb16452d857a3b7bb0ecc90
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1211547
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/Makefile.nvgpu | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 233 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | 119 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm206/hal_gm206.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/css_vgpu.c | 221 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/vgpu.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/vgpu.h | 8 | ||||
-rw-r--r-- | include/linux/tegra_vgpu.h | 11 |
14 files changed, 505 insertions, 143 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 5ca2f56f..b8e38919 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -105,6 +105,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ | |||
105 | vgpu/dbg_vgpu.o \ | 105 | vgpu/dbg_vgpu.o \ |
106 | vgpu/fecs_trace_vgpu.o \ | 106 | vgpu/fecs_trace_vgpu.o \ |
107 | vgpu/tsg_vgpu.o \ | 107 | vgpu/tsg_vgpu.o \ |
108 | vgpu/css_vgpu.o \ | ||
108 | vgpu/gk20a/vgpu_hal_gk20a.o \ | 109 | vgpu/gk20a/vgpu_hal_gk20a.o \ |
109 | vgpu/gk20a/vgpu_gr_gk20a.o \ | 110 | vgpu/gk20a/vgpu_gr_gk20a.o \ |
110 | vgpu/gm20b/vgpu_hal_gm20b.o \ | 111 | vgpu/gm20b/vgpu_hal_gm20b.o \ |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 49711af9..d23a8026 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -603,7 +603,7 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) | |||
603 | 603 | ||
604 | mutex_lock(&ch->cs_client_mutex); | 604 | mutex_lock(&ch->cs_client_mutex); |
605 | if (ch->cs_client) | 605 | if (ch->cs_client) |
606 | ret = gr_gk20a_css_flush(ch->g, ch->cs_client); | 606 | ret = gr_gk20a_css_flush(ch, ch->cs_client); |
607 | else | 607 | else |
608 | ret = -EBADF; | 608 | ret = -EBADF; |
609 | mutex_unlock(&ch->cs_client_mutex); | 609 | mutex_unlock(&ch->cs_client_mutex); |
@@ -622,7 +622,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | |||
622 | if (ch->cs_client) { | 622 | if (ch->cs_client) { |
623 | ret = -EEXIST; | 623 | ret = -EEXIST; |
624 | } else { | 624 | } else { |
625 | ret = gr_gk20a_css_attach(ch->g, | 625 | ret = gr_gk20a_css_attach(ch, |
626 | dmabuf_fd, | 626 | dmabuf_fd, |
627 | perfmon_id_count, | 627 | perfmon_id_count, |
628 | perfmon_id_start, | 628 | perfmon_id_start, |
@@ -639,7 +639,7 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch) | |||
639 | 639 | ||
640 | mutex_lock(&ch->cs_client_mutex); | 640 | mutex_lock(&ch->cs_client_mutex); |
641 | if (ch->cs_client) { | 641 | if (ch->cs_client) { |
642 | ret = gr_gk20a_css_detach(ch->g, ch->cs_client); | 642 | ret = gr_gk20a_css_detach(ch, ch->cs_client); |
643 | ch->cs_client = NULL; | 643 | ch->cs_client = NULL; |
644 | } else { | 644 | } else { |
645 | ret = 0; | 645 | ret = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 62f60761..71614d6e 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -25,93 +25,13 @@ | |||
25 | #include "gk20a.h" | 25 | #include "gk20a.h" |
26 | #include "hw_perf_gk20a.h" | 26 | #include "hw_perf_gk20a.h" |
27 | #include "hw_mc_gk20a.h" | 27 | #include "hw_mc_gk20a.h" |
28 | 28 | #include "css_gr_gk20a.h" | |
29 | |||
30 | |||
31 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
32 | struct gk20a_cs_snapshot_fifo { | ||
33 | /* layout description of the buffer */ | ||
34 | u32 start; | ||
35 | u32 end; | ||
36 | |||
37 | /* snafu bits */ | ||
38 | u32 hw_overflow_events_occured; | ||
39 | u32 sw_overflow_events_occured; | ||
40 | |||
41 | /* the kernel copies new entries to put and | ||
42 | * increment the put++. if put == get then | ||
43 | * overflowEventsOccured++ | ||
44 | */ | ||
45 | u32 put; | ||
46 | u32 _reserved10; | ||
47 | u32 _reserved11; | ||
48 | u32 _reserved12; | ||
49 | |||
50 | /* the driver/client reads from get until | ||
51 | * put==get, get++ */ | ||
52 | u32 get; | ||
53 | u32 _reserved20; | ||
54 | u32 _reserved21; | ||
55 | u32 _reserved22; | ||
56 | |||
57 | /* unused */ | ||
58 | u32 _reserved30; | ||
59 | u32 _reserved31; | ||
60 | u32 _reserved32; | ||
61 | u32 _reserved33; | ||
62 | }; | ||
63 | |||
64 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
65 | struct gk20a_cs_snapshot_fifo_entry { | ||
66 | /* global 48 timestamp */ | ||
67 | u32 timestamp31_00:32; | ||
68 | u32 timestamp39_32:8; | ||
69 | |||
70 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
71 | u32 perfmon_id:8; | ||
72 | |||
73 | /* typically samples_counter is wired to #pmtrigger count */ | ||
74 | u32 samples_counter:12; | ||
75 | |||
76 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
77 | u32 ds:1; | ||
78 | u32 sz:1; | ||
79 | u32 zero0:1; | ||
80 | u32 zero1:1; | ||
81 | |||
82 | /* counter results */ | ||
83 | u32 event_cnt:32; | ||
84 | u32 trigger0_cnt:32; | ||
85 | u32 trigger1_cnt:32; | ||
86 | u32 sample_cnt:32; | ||
87 | |||
88 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
89 | u16 local_trigger_b_count:16; | ||
90 | u16 book_mark_b:16; | ||
91 | u16 local_trigger_a_count:16; | ||
92 | u16 book_mark_a:16; | ||
93 | }; | ||
94 | |||
95 | |||
96 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
97 | struct gk20a_cs_snapshot_client { | ||
98 | struct list_head list; | ||
99 | u32 dmabuf_fd; | ||
100 | struct dma_buf *dma_handler; | ||
101 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
102 | u32 snapshot_size; | ||
103 | u32 perfmon_start; | ||
104 | u32 perfmon_count; | ||
105 | }; | ||
106 | 29 | ||
107 | /* check client for pointed perfmon ownership */ | 30 | /* check client for pointed perfmon ownership */ |
108 | #define CONTAINS_PERFMON(cl, pm) \ | 31 | #define CONTAINS_PERFMON(cl, pm) \ |
109 | ((cl)->perfmon_start <= (pm) && \ | 32 | ((cl)->perfmon_start <= (pm) && \ |
110 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) | 33 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) |
111 | 34 | ||
112 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
113 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
114 | |||
115 | /* the minimal size of client buffer */ | 35 | /* the minimal size of client buffer */ |
116 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | 36 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ |
117 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | 37 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ |
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client { | |||
131 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | 51 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ |
132 | #define CSS_MAX_PERFMON_IDS 256 | 52 | #define CSS_MAX_PERFMON_IDS 256 |
133 | 53 | ||
134 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
135 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) | ||
136 | |||
137 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
138 | struct gk20a_cs_snapshot { | ||
139 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; | ||
140 | struct list_head clients; | ||
141 | struct mem_desc hw_memdesc; | ||
142 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
143 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
144 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
145 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
146 | }; | ||
147 | |||
148 | /* reports whether the hw queue overflowed */ | 54 | /* reports whether the hw queue overflowed */ |
149 | static inline bool css_hw_get_overflow_status(struct gk20a *g) | 55 | static inline bool css_hw_get_overflow_status(struct gk20a *g) |
150 | { | 56 | { |
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr) | |||
215 | return 0; | 121 | return 0; |
216 | } | 122 | } |
217 | 123 | ||
218 | static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) | 124 | static int css_hw_enable_snapshot(struct channel_gk20a *ch, |
125 | struct gk20a_cs_snapshot_client *cs_client) | ||
219 | { | 126 | { |
220 | struct gk20a *g = gr->g; | 127 | struct gk20a *g = ch->g; |
128 | struct gr_gk20a *gr = &g->gr; | ||
221 | struct gk20a_cs_snapshot *data = gr->cs_data; | 129 | struct gk20a_cs_snapshot *data = gr->cs_data; |
130 | u32 snapshot_size = cs_client->snapshot_size; | ||
222 | int ret; | 131 | int ret; |
223 | 132 | ||
224 | u32 virt_addr_lo; | 133 | u32 virt_addr_lo; |
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr) | |||
317 | 226 | ||
318 | static void css_gr_free_shared_data(struct gr_gk20a *gr) | 227 | static void css_gr_free_shared_data(struct gr_gk20a *gr) |
319 | { | 228 | { |
229 | struct gk20a *g = gr->g; | ||
230 | |||
320 | if (gr->cs_data) { | 231 | if (gr->cs_data) { |
321 | /* the clients list is expected to be empty */ | 232 | /* the clients list is expected to be empty */ |
322 | css_hw_disable_snapshot(gr); | 233 | g->ops.css.disable_snapshot(gr); |
323 | 234 | ||
324 | /* release the objects */ | 235 | /* release the objects */ |
325 | kfree(gr->cs_data); | 236 | kfree(gr->cs_data); |
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon) | |||
344 | return NULL; | 255 | return NULL; |
345 | } | 256 | } |
346 | 257 | ||
347 | static int css_gr_flush_snapshots(struct gr_gk20a *gr) | 258 | static int css_gr_flush_snapshots(struct channel_gk20a *ch) |
348 | { | 259 | { |
349 | struct gk20a *g = gr->g; | 260 | struct gk20a *g = ch->g; |
261 | struct gr_gk20a *gr = &g->gr; | ||
350 | struct gk20a_cs_snapshot *css = gr->cs_data; | 262 | struct gk20a_cs_snapshot *css = gr->cs_data; |
351 | struct gk20a_cs_snapshot_client *cur; | 263 | struct gk20a_cs_snapshot_client *cur; |
352 | u32 pending; | 264 | u32 pending, completed; |
265 | bool hw_overflow; | ||
266 | int err; | ||
353 | 267 | ||
354 | /* variables for iterating over HW entries */ | 268 | /* variables for iterating over HW entries */ |
355 | u32 sid; | 269 | u32 sid; |
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
360 | struct gk20a_cs_snapshot_fifo *dst; | 274 | struct gk20a_cs_snapshot_fifo *dst; |
361 | struct gk20a_cs_snapshot_fifo_entry *dst_get; | 275 | struct gk20a_cs_snapshot_fifo_entry *dst_get; |
362 | struct gk20a_cs_snapshot_fifo_entry *dst_put; | 276 | struct gk20a_cs_snapshot_fifo_entry *dst_put; |
277 | struct gk20a_cs_snapshot_fifo_entry *dst_nxt; | ||
363 | struct gk20a_cs_snapshot_fifo_entry *dst_head; | 278 | struct gk20a_cs_snapshot_fifo_entry *dst_head; |
364 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; | 279 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; |
365 | 280 | ||
366 | if (!css) | 281 | if (!css) |
367 | return -EINVAL; | 282 | return -EINVAL; |
368 | 283 | ||
369 | if (!css->hw_snapshot) | ||
370 | return -EINVAL; | ||
371 | |||
372 | if (list_empty(&css->clients)) | 284 | if (list_empty(&css->clients)) |
373 | return -EBADF; | 285 | return -EBADF; |
374 | 286 | ||
375 | /* check data available */ | 287 | /* check data available */ |
376 | pending = css_hw_get_pending_snapshots(g); | 288 | err = g->ops.css.check_data_available(ch, &pending, &hw_overflow); |
289 | if (err) | ||
290 | return err; | ||
291 | |||
377 | if (!pending) | 292 | if (!pending) |
378 | return 0; | 293 | return 0; |
379 | 294 | ||
380 | if (css_hw_get_overflow_status(g)) { | 295 | if (hw_overflow) { |
381 | struct list_head *pos; | 296 | struct list_head *pos; |
382 | 297 | ||
383 | list_for_each(pos, &css->clients) { | 298 | list_for_each(pos, &css->clients) { |
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
387 | } | 302 | } |
388 | 303 | ||
389 | gk20a_warn(dev_from_gk20a(g), | 304 | gk20a_warn(dev_from_gk20a(g), |
390 | "cyclestats: hardware overflow detected\n"); | 305 | "cyclestats: hardware overflow detected\n"); |
391 | } | 306 | } |
392 | 307 | ||
393 | /* proceed all items in HW buffer */ | 308 | /* process all items in HW buffer */ |
394 | sid = 0; | 309 | sid = 0; |
310 | completed = 0; | ||
395 | cur = NULL; | 311 | cur = NULL; |
396 | dst = NULL; | 312 | dst = NULL; |
397 | dst_put = NULL; | 313 | dst_put = NULL; |
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
419 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); | 335 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); |
420 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); | 336 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); |
421 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); | 337 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); |
422 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1; | 338 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end); |
339 | |||
340 | dst_nxt = dst_put + 1; | ||
341 | if (dst_nxt == dst_tail) | ||
342 | dst_nxt = dst_head; | ||
423 | } else { | 343 | } else { |
424 | /* client not found - skipping this entry */ | 344 | /* client not found - skipping this entry */ |
425 | gk20a_warn(dev_from_gk20a(g), | 345 | gk20a_warn(dev_from_gk20a(g), |
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
430 | } | 350 | } |
431 | 351 | ||
432 | /* check for software overflows */ | 352 | /* check for software overflows */ |
433 | if (dst_put + 1 == dst_get || | 353 | if (dst_nxt == dst_get) { |
434 | (dst_put == dst_tail && dst_get == dst_head)) { | ||
435 | /* no data copy, no pointer updates */ | 354 | /* no data copy, no pointer updates */ |
436 | dst->sw_overflow_events_occured++; | 355 | dst->sw_overflow_events_occured++; |
437 | gk20a_warn(dev_from_gk20a(g), | 356 | gk20a_warn(dev_from_gk20a(g), |
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) | |||
439 | src->perfmon_id); | 358 | src->perfmon_id); |
440 | } else { | 359 | } else { |
441 | *dst_put = *src; | 360 | *dst_put = *src; |
442 | if (dst_put == dst_tail) | 361 | completed++; |
443 | dst_put = dst_head; | 362 | |
444 | else | 363 | dst_put = dst_nxt++; |
445 | dst_put++; | 364 | |
365 | if (dst_nxt == dst_tail) | ||
366 | dst_nxt = dst_head; | ||
446 | } | 367 | } |
447 | 368 | ||
448 | next_hw_fifo_entry: | 369 | next_hw_fifo_entry: |
@@ -465,14 +386,17 @@ next_hw_fifo_entry: | |||
465 | (css->hw_end - css->hw_get) * sizeof(*src)); | 386 | (css->hw_end - css->hw_get) * sizeof(*src)); |
466 | } | 387 | } |
467 | gr->cs_data->hw_get = src; | 388 | gr->cs_data->hw_get = src; |
468 | css_hw_set_handled_snapshots(g, sid); | 389 | |
469 | if (pending != sid) { | 390 | if (g->ops.css.set_handled_snapshots) |
391 | g->ops.css.set_handled_snapshots(g, sid); | ||
392 | |||
393 | if (completed != sid) { | ||
470 | /* not all entries proceed correctly. some of problems */ | 394 | /* not all entries proceed correctly. some of problems */ |
471 | /* reported as overflows, some as orphaned perfmons, */ | 395 | /* reported as overflows, some as orphaned perfmons, */ |
472 | /* but it will be better notify with summary about it */ | 396 | /* but it will be better notify with summary about it */ |
473 | gk20a_warn(dev_from_gk20a(g), | 397 | gk20a_warn(dev_from_gk20a(g), |
474 | "cyclestats: done %u from %u entries\n", | 398 | "cyclestats: completed %u from %u entries\n", |
475 | sid, pending); | 399 | completed, pending); |
476 | } | 400 | } |
477 | 401 | ||
478 | return 0; | 402 | return 0; |
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | |||
511 | } | 435 | } |
512 | 436 | ||
513 | 437 | ||
514 | static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | 438 | static int css_gr_free_client_data(struct gk20a *g, |
439 | struct gk20a_cs_snapshot *data, | ||
515 | struct gk20a_cs_snapshot_client *client) | 440 | struct gk20a_cs_snapshot_client *client) |
516 | { | 441 | { |
517 | int ret = 0; | 442 | int ret = 0; |
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | |||
519 | if (client->list.next && client->list.prev) | 444 | if (client->list.next && client->list.prev) |
520 | list_del(&client->list); | 445 | list_del(&client->list); |
521 | 446 | ||
522 | if (client->perfmon_start && client->perfmon_count) { | 447 | if (client->perfmon_start && client->perfmon_count |
523 | if (client->perfmon_count != css_gr_release_perfmon_ids(data, | 448 | && g->ops.css.release_perfmon_ids) { |
449 | if (client->perfmon_count != g->ops.css.release_perfmon_ids(data, | ||
524 | client->perfmon_start, client->perfmon_count)) | 450 | client->perfmon_start, client->perfmon_count)) |
525 | ret = -EINVAL; | 451 | ret = -EINVAL; |
526 | } | 452 | } |
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, | |||
536 | return ret; | 462 | return ret; |
537 | } | 463 | } |
538 | 464 | ||
539 | static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | 465 | static int css_gr_create_client_data(struct gk20a *g, |
466 | struct gk20a_cs_snapshot *data, | ||
540 | u32 dmabuf_fd, u32 perfmon_count, | 467 | u32 dmabuf_fd, u32 perfmon_count, |
541 | struct gk20a_cs_snapshot_client **client) | 468 | struct gk20a_cs_snapshot_client **client) |
542 | { | 469 | { |
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | |||
581 | cur->snapshot->put = cur->snapshot->start; | 508 | cur->snapshot->put = cur->snapshot->start; |
582 | 509 | ||
583 | cur->perfmon_count = perfmon_count; | 510 | cur->perfmon_count = perfmon_count; |
584 | if (cur->perfmon_count) { | 511 | |
585 | cur->perfmon_start = css_gr_allocate_perfmon_ids(data, | 512 | /* In virtual case, perfmon ID allocation is handled by the server |
513 | * at the time of the attach (allocate_perfmon_ids is NULL in this case) | ||
514 | */ | ||
515 | if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) { | ||
516 | cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data, | ||
586 | cur->perfmon_count); | 517 | cur->perfmon_count); |
587 | if (!cur->perfmon_start) { | 518 | if (!cur->perfmon_start) { |
588 | ret = -ENOENT; | 519 | ret = -ENOENT; |
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, | |||
598 | failed: | 529 | failed: |
599 | *client = NULL; | 530 | *client = NULL; |
600 | if (cur) | 531 | if (cur) |
601 | css_gr_free_client_data(data, cur); | 532 | css_gr_free_client_data(g, data, cur); |
602 | 533 | ||
603 | return ret; | 534 | return ret; |
604 | } | 535 | } |
605 | 536 | ||
606 | 537 | ||
607 | int gr_gk20a_css_attach(struct gk20a *g, | 538 | int gr_gk20a_css_attach(struct channel_gk20a *ch, |
608 | u32 dmabuf_fd, | 539 | u32 dmabuf_fd, |
609 | u32 perfmon_count, | 540 | u32 perfmon_count, |
610 | u32 *perfmon_start, | 541 | u32 *perfmon_start, |
611 | struct gk20a_cs_snapshot_client **cs_client) | 542 | struct gk20a_cs_snapshot_client **cs_client) |
612 | { | 543 | { |
613 | int ret = 0; | 544 | int ret = 0; |
545 | struct gk20a *g = ch->g; | ||
614 | struct gr_gk20a *gr; | 546 | struct gr_gk20a *gr; |
615 | 547 | ||
616 | /* we must have a placeholder to store pointer to client structure */ | 548 | /* we must have a placeholder to store pointer to client structure */ |
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g, | |||
630 | if (ret) | 562 | if (ret) |
631 | goto failed; | 563 | goto failed; |
632 | 564 | ||
633 | ret = css_gr_create_client_data(gr->cs_data, | 565 | ret = css_gr_create_client_data(g, gr->cs_data, |
634 | dmabuf_fd, | 566 | dmabuf_fd, |
635 | perfmon_count, | 567 | perfmon_count, |
636 | cs_client); | 568 | cs_client); |
637 | if (ret) | 569 | if (ret) |
638 | goto failed; | 570 | goto failed; |
639 | 571 | ||
640 | ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size); | 572 | ret = g->ops.css.enable_snapshot(ch, *cs_client); |
641 | if (ret) | 573 | if (ret) |
642 | goto failed; | 574 | goto failed; |
643 | 575 | ||
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g, | |||
651 | failed: | 583 | failed: |
652 | if (gr->cs_data) { | 584 | if (gr->cs_data) { |
653 | if (*cs_client) { | 585 | if (*cs_client) { |
654 | css_gr_free_client_data(gr->cs_data, *cs_client); | 586 | css_gr_free_client_data(g, gr->cs_data, *cs_client); |
655 | *cs_client = NULL; | 587 | *cs_client = NULL; |
656 | } | 588 | } |
657 | 589 | ||
@@ -666,10 +598,11 @@ failed: | |||
666 | return ret; | 598 | return ret; |
667 | } | 599 | } |
668 | 600 | ||
669 | int gr_gk20a_css_detach(struct gk20a *g, | 601 | int gr_gk20a_css_detach(struct channel_gk20a *ch, |
670 | struct gk20a_cs_snapshot_client *cs_client) | 602 | struct gk20a_cs_snapshot_client *cs_client) |
671 | { | 603 | { |
672 | int ret = 0; | 604 | int ret = 0; |
605 | struct gk20a *g = ch->g; | ||
673 | struct gr_gk20a *gr; | 606 | struct gr_gk20a *gr; |
674 | 607 | ||
675 | if (!cs_client) | 608 | if (!cs_client) |
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g, | |||
680 | if (gr->cs_data) { | 613 | if (gr->cs_data) { |
681 | struct gk20a_cs_snapshot *data = gr->cs_data; | 614 | struct gk20a_cs_snapshot *data = gr->cs_data; |
682 | 615 | ||
683 | ret = css_gr_free_client_data(data, cs_client); | 616 | if (g->ops.css.detach_snapshot) |
617 | g->ops.css.detach_snapshot(ch, cs_client); | ||
618 | |||
619 | ret = css_gr_free_client_data(g, data, cs_client); | ||
684 | if (list_empty(&data->clients)) | 620 | if (list_empty(&data->clients)) |
685 | css_gr_free_shared_data(gr); | 621 | css_gr_free_shared_data(gr); |
686 | } else { | 622 | } else { |
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g, | |||
691 | return ret; | 627 | return ret; |
692 | } | 628 | } |
693 | 629 | ||
694 | int gr_gk20a_css_flush(struct gk20a *g, | 630 | int gr_gk20a_css_flush(struct channel_gk20a *ch, |
695 | struct gk20a_cs_snapshot_client *cs_client) | 631 | struct gk20a_cs_snapshot_client *cs_client) |
696 | { | 632 | { |
697 | int ret = 0; | 633 | int ret = 0; |
634 | struct gk20a *g = ch->g; | ||
698 | struct gr_gk20a *gr; | 635 | struct gr_gk20a *gr; |
699 | 636 | ||
700 | if (!cs_client) | 637 | if (!cs_client) |
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g, | |||
702 | 639 | ||
703 | gr = &g->gr; | 640 | gr = &g->gr; |
704 | mutex_lock(&gr->cs_lock); | 641 | mutex_lock(&gr->cs_lock); |
705 | ret = css_gr_flush_snapshots(gr); | 642 | ret = css_gr_flush_snapshots(ch); |
706 | mutex_unlock(&gr->cs_lock); | 643 | mutex_unlock(&gr->cs_lock); |
707 | 644 | ||
708 | return ret; | 645 | return ret; |
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | |||
718 | mutex_unlock(&gr->cs_lock); | 655 | mutex_unlock(&gr->cs_lock); |
719 | mutex_destroy(&gr->cs_lock); | 656 | mutex_destroy(&gr->cs_lock); |
720 | } | 657 | } |
658 | |||
659 | static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
660 | bool *hw_overflow) | ||
661 | { | ||
662 | struct gk20a *g = ch->g; | ||
663 | struct gr_gk20a *gr = &g->gr; | ||
664 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
665 | |||
666 | if (!css->hw_snapshot) | ||
667 | return -EINVAL; | ||
668 | |||
669 | *pending = css_hw_get_pending_snapshots(g); | ||
670 | if (!*pending) | ||
671 | return 0; | ||
672 | |||
673 | *hw_overflow = css_hw_get_overflow_status(g); | ||
674 | return 0; | ||
675 | } | ||
676 | |||
677 | void gk20a_init_css_ops(struct gpu_ops *gops) | ||
678 | { | ||
679 | gops->css.enable_snapshot = css_hw_enable_snapshot; | ||
680 | gops->css.disable_snapshot = css_hw_disable_snapshot; | ||
681 | gops->css.check_data_available = css_hw_check_data_available; | ||
682 | gops->css.set_handled_snapshots = css_hw_set_handled_snapshots; | ||
683 | gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids; | ||
684 | gops->css.release_perfmon_ids = css_gr_release_perfmon_ids; | ||
685 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h new file mode 100644 index 00000000..be638abf --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef CSS_GR_GK20A_H | ||
20 | #define CSS_GR_GK20A_H | ||
21 | |||
22 | /* the minimal size of HW buffer - should be enough to avoid HW overflows */ | ||
23 | #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) | ||
24 | |||
25 | /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ | ||
26 | struct gk20a_cs_snapshot_fifo { | ||
27 | /* layout description of the buffer */ | ||
28 | u32 start; | ||
29 | u32 end; | ||
30 | |||
31 | /* snafu bits */ | ||
32 | u32 hw_overflow_events_occured; | ||
33 | u32 sw_overflow_events_occured; | ||
34 | |||
35 | /* the kernel copies new entries to put and | ||
36 | * increment the put++. if put == get then | ||
37 | * overflowEventsOccured++ | ||
38 | */ | ||
39 | u32 put; | ||
40 | u32 _reserved10; | ||
41 | u32 _reserved11; | ||
42 | u32 _reserved12; | ||
43 | |||
44 | /* the driver/client reads from get until | ||
45 | * put==get, get++ */ | ||
46 | u32 get; | ||
47 | u32 _reserved20; | ||
48 | u32 _reserved21; | ||
49 | u32 _reserved22; | ||
50 | |||
51 | /* unused */ | ||
52 | u32 _reserved30; | ||
53 | u32 _reserved31; | ||
54 | u32 _reserved32; | ||
55 | u32 _reserved33; | ||
56 | }; | ||
57 | |||
58 | /* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ | ||
59 | struct gk20a_cs_snapshot_fifo_entry { | ||
60 | /* global 48 timestamp */ | ||
61 | u32 timestamp31_00:32; | ||
62 | u32 timestamp39_32:8; | ||
63 | |||
64 | /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ | ||
65 | u32 perfmon_id:8; | ||
66 | |||
67 | /* typically samples_counter is wired to #pmtrigger count */ | ||
68 | u32 samples_counter:12; | ||
69 | |||
70 | /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ | ||
71 | u32 ds:1; | ||
72 | u32 sz:1; | ||
73 | u32 zero0:1; | ||
74 | u32 zero1:1; | ||
75 | |||
76 | /* counter results */ | ||
77 | u32 event_cnt:32; | ||
78 | u32 trigger0_cnt:32; | ||
79 | u32 trigger1_cnt:32; | ||
80 | u32 sample_cnt:32; | ||
81 | |||
82 | /* Local PmTrigger results for Maxwell+ or padding otherwise */ | ||
83 | u16 local_trigger_b_count:16; | ||
84 | u16 book_mark_b:16; | ||
85 | u16 local_trigger_a_count:16; | ||
86 | u16 book_mark_a:16; | ||
87 | }; | ||
88 | |||
89 | /* cycle stats snapshot client data (e.g. associated with channel) */ | ||
90 | struct gk20a_cs_snapshot_client { | ||
91 | struct list_head list; | ||
92 | u32 dmabuf_fd; | ||
93 | struct dma_buf *dma_handler; | ||
94 | struct gk20a_cs_snapshot_fifo *snapshot; | ||
95 | u32 snapshot_size; | ||
96 | u32 perfmon_start; | ||
97 | u32 perfmon_count; | ||
98 | }; | ||
99 | |||
100 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
101 | #define CSS_MAX_PERFMON_IDS 256 | ||
102 | |||
103 | /* local definitions to avoid hardcodes sizes and shifts */ | ||
104 | #define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) | ||
105 | |||
106 | /* cycle stats snapshot control structure for one HW entry and many clients */ | ||
107 | struct gk20a_cs_snapshot { | ||
108 | unsigned long perfmon_ids[PM_BITMAP_SIZE]; | ||
109 | struct list_head clients; | ||
110 | struct mem_desc hw_memdesc; | ||
111 | /* pointer to allocated cpu_va memory where GPU place data */ | ||
112 | struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; | ||
113 | struct gk20a_cs_snapshot_fifo_entry *hw_end; | ||
114 | struct gk20a_cs_snapshot_fifo_entry *hw_get; | ||
115 | }; | ||
116 | |||
117 | void gk20a_init_css_ops(struct gpu_ops *gops); | ||
118 | |||
119 | #endif /* CSS_GR_GK20A_H */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 669ef1b9..1ca8ff77 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -657,6 +657,24 @@ struct gpu_ops { | |||
657 | struct { | 657 | struct { |
658 | int (*init)(struct gk20a *g); | 658 | int (*init)(struct gk20a *g); |
659 | } bios; | 659 | } bios; |
660 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
661 | struct { | ||
662 | int (*enable_snapshot)(struct channel_gk20a *ch, | ||
663 | struct gk20a_cs_snapshot_client *client); | ||
664 | void (*disable_snapshot)(struct gr_gk20a *gr); | ||
665 | int (*check_data_available)(struct channel_gk20a *ch, | ||
666 | u32 *pending, | ||
667 | bool *hw_overflow); | ||
668 | void (*set_handled_snapshots)(struct gk20a *g, u32 num); | ||
669 | u32 (*allocate_perfmon_ids)(struct gk20a_cs_snapshot *data, | ||
670 | u32 count); | ||
671 | u32 (*release_perfmon_ids)(struct gk20a_cs_snapshot *data, | ||
672 | u32 start, | ||
673 | u32 count); | ||
674 | int (*detach_snapshot)(struct channel_gk20a *ch, | ||
675 | struct gk20a_cs_snapshot_client *client); | ||
676 | } css; | ||
677 | #endif | ||
660 | }; | 678 | }; |
661 | 679 | ||
662 | struct nvgpu_bios_ucode { | 680 | struct nvgpu_bios_ucode { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2a351bc3..c337a74a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -603,16 +603,16 @@ int gr_gk20a_halt_pipe(struct gk20a *g); | |||
603 | int gr_gk20a_debugfs_init(struct gk20a *g); | 603 | int gr_gk20a_debugfs_init(struct gk20a *g); |
604 | 604 | ||
605 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 605 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
606 | int gr_gk20a_css_attach(struct gk20a *g, /* in - main hw structure */ | 606 | int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ |
607 | u32 dmabuf_fd, /* in - dma mapped memory */ | 607 | u32 dmabuf_fd, /* in - dma mapped memory */ |
608 | u32 perfmon_id_count, /* in - number of perfmons*/ | 608 | u32 perfmon_id_count, /* in - number of perfmons*/ |
609 | u32 *perfmon_id_start, /* out- index of first pm */ | 609 | u32 *perfmon_id_start, /* out- index of first pm */ |
610 | /* out - pointer to client data used in later */ | 610 | /* out - pointer to client data used in later */ |
611 | struct gk20a_cs_snapshot_client **css_client); | 611 | struct gk20a_cs_snapshot_client **css_client); |
612 | 612 | ||
613 | int gr_gk20a_css_detach(struct gk20a *g, | 613 | int gr_gk20a_css_detach(struct channel_gk20a *ch, |
614 | struct gk20a_cs_snapshot_client *css_client); | 614 | struct gk20a_cs_snapshot_client *css_client); |
615 | int gr_gk20a_css_flush(struct gk20a *g, | 615 | int gr_gk20a_css_flush(struct channel_gk20a *ch, |
616 | struct gk20a_cs_snapshot_client *css_client); | 616 | struct gk20a_cs_snapshot_client *css_client); |
617 | 617 | ||
618 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); | 618 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 4da7ffad..550dffa6 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include "hw_proj_gk20a.h" | 32 | #include "hw_proj_gk20a.h" |
33 | #include "tsg_gk20a.h" | 33 | #include "tsg_gk20a.h" |
34 | #include "dbg_gpu_gk20a.h" | 34 | #include "dbg_gpu_gk20a.h" |
35 | #include "css_gr_gk20a.h" | ||
35 | 36 | ||
36 | static struct gpu_ops gk20a_ops = { | 37 | static struct gpu_ops gk20a_ops = { |
37 | .clock_gating = { | 38 | .clock_gating = { |
@@ -157,6 +158,9 @@ int gk20a_init_hal(struct gk20a *g) | |||
157 | gk20a_init_dbg_session_ops(gops); | 158 | gk20a_init_dbg_session_ops(gops); |
158 | gk20a_init_therm_ops(gops); | 159 | gk20a_init_therm_ops(gops); |
159 | gk20a_init_tsg_ops(gops); | 160 | gk20a_init_tsg_ops(gops); |
161 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
162 | gk20a_init_css_ops(gops); | ||
163 | #endif | ||
160 | gops->name = "gk20a"; | 164 | gops->name = "gk20a"; |
161 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 165 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
162 | gops->get_litter_value = gk20a_get_litter_value; | 166 | gops->get_litter_value = gk20a_get_litter_value; |
diff --git a/drivers/gpu/nvgpu/gm206/hal_gm206.c b/drivers/gpu/nvgpu/gm206/hal_gm206.c index 3c6897ea..6b43c8e9 100644 --- a/drivers/gpu/nvgpu/gm206/hal_gm206.c +++ b/drivers/gpu/nvgpu/gm206/hal_gm206.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "gr_gm206.h" | 41 | #include "gr_gm206.h" |
42 | #include "hw_proj_gm206.h" | 42 | #include "hw_proj_gm206.h" |
43 | #include "gk20a/dbg_gpu_gk20a.h" | 43 | #include "gk20a/dbg_gpu_gk20a.h" |
44 | #include "gk20a/css_gr_gk20a.h" | ||
44 | 45 | ||
45 | static struct gpu_ops gm206_ops = { | 46 | static struct gpu_ops gm206_ops = { |
46 | .clock_gating = { | 47 | .clock_gating = { |
@@ -199,6 +200,9 @@ int gm206_init_hal(struct gk20a *g) | |||
199 | gm20b_init_cde_ops(gops); | 200 | gm20b_init_cde_ops(gops); |
200 | gm20b_init_therm_ops(gops); | 201 | gm20b_init_therm_ops(gops); |
201 | gk20a_init_tsg_ops(gops); | 202 | gk20a_init_tsg_ops(gops); |
203 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
204 | gk20a_init_css_ops(gops); | ||
205 | #endif | ||
202 | gm206_init_bios(gops); | 206 | gm206_init_bios(gops); |
203 | switch(ver){ | 207 | switch(ver){ |
204 | case GK20A_GPUID_GM206: | 208 | case GK20A_GPUID_GM206: |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 34e3b605..e30ca96f 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "therm_gm20b.h" | 36 | #include "therm_gm20b.h" |
37 | #include "hw_proj_gm20b.h" | 37 | #include "hw_proj_gm20b.h" |
38 | #include "gk20a/dbg_gpu_gk20a.h" | 38 | #include "gk20a/dbg_gpu_gk20a.h" |
39 | #include "gk20a/css_gr_gk20a.h" | ||
39 | 40 | ||
40 | #define FUSE_OPT_PRIV_SEC_DIS_0 0x264 | 41 | #define FUSE_OPT_PRIV_SEC_DIS_0 0x264 |
41 | #define PRIV_SECURITY_DISABLE 0x01 | 42 | #define PRIV_SECURITY_DISABLE 0x01 |
@@ -226,6 +227,9 @@ int gm20b_init_hal(struct gk20a *g) | |||
226 | gm20b_init_cde_ops(gops); | 227 | gm20b_init_cde_ops(gops); |
227 | gm20b_init_therm_ops(gops); | 228 | gm20b_init_therm_ops(gops); |
228 | gk20a_init_tsg_ops(gops); | 229 | gk20a_init_tsg_ops(gops); |
230 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
231 | gk20a_init_css_ops(gops); | ||
232 | #endif | ||
229 | gops->name = "gm20b"; | 233 | gops->name = "gm20b"; |
230 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 234 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
231 | gops->get_litter_value = gm20b_get_litter_value; | 235 | gops->get_litter_value = gm20b_get_litter_value; |
diff --git a/drivers/gpu/nvgpu/vgpu/css_vgpu.c b/drivers/gpu/nvgpu/vgpu/css_vgpu.c new file mode 100644 index 00000000..486d3e88 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/nvhost.h> | ||
20 | #include <linux/tegra-ivc.h> | ||
21 | #include <linux/tegra_vgpu.h> | ||
22 | |||
23 | #include "gk20a/gk20a.h" | ||
24 | #include "gk20a/channel_gk20a.h" | ||
25 | #include "gk20a/platform_gk20a.h" | ||
26 | #include "gk20a/css_gr_gk20a.h" | ||
27 | #include "vgpu.h" | ||
28 | |||
29 | struct vgpu_hw_snapshot_buffer { | ||
30 | struct tegra_hv_ivm_cookie *cookie; | ||
31 | void *buf; | ||
32 | struct gk20a_cs_snapshot_fifo_entry *end; | ||
33 | struct gk20a_cs_snapshot_fifo_entry *src_get; | ||
34 | struct gk20a_cs_snapshot_fifo_entry *src_put; | ||
35 | }; | ||
36 | |||
37 | struct tegra_hv_ivm_cookie *css_cookie; | ||
38 | |||
39 | int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr) | ||
40 | { | ||
41 | struct gk20a *g = gr->g; | ||
42 | struct device *dev = g->dev; | ||
43 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
44 | struct device_node *np = dev->of_node; | ||
45 | struct of_phandle_args args; | ||
46 | struct device_node *hv_np; | ||
47 | void *buf = NULL; | ||
48 | u32 mempool; | ||
49 | int err; | ||
50 | |||
51 | gk20a_dbg_fn(""); | ||
52 | |||
53 | if (data->hw_snapshot) | ||
54 | return 0; | ||
55 | |||
56 | err = of_parse_phandle_with_fixed_args(np, | ||
57 | "mempool-css", 1, 0, &args); | ||
58 | if (err) { | ||
59 | dev_info(dev_from_gk20a(g), "dt missing mempool-css\n"); | ||
60 | goto fail; | ||
61 | } | ||
62 | |||
63 | hv_np = args.np; | ||
64 | mempool = args.args[0]; | ||
65 | css_cookie = tegra_hv_mempool_reserve(hv_np, mempool); | ||
66 | if (IS_ERR(css_cookie)) { | ||
67 | dev_info(dev_from_gk20a(g), | ||
68 | "mempool %u reserve failed\n", mempool); | ||
69 | err = -EINVAL; | ||
70 | goto fail; | ||
71 | } | ||
72 | |||
73 | /* Make sure buffer size is large enough */ | ||
74 | if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) { | ||
75 | dev_info(dev_from_gk20a(g), "mempool size %lld too small\n", | ||
76 | css_cookie->size); | ||
77 | err = -ENOMEM; | ||
78 | goto fail; | ||
79 | } | ||
80 | |||
81 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) | ||
82 | buf = ioremap_cached(css_cookie->ipa, css_cookie->size); | ||
83 | #else | ||
84 | buf = ioremap_cache(css_cookie->ipa, css_cookie->size); | ||
85 | #endif | ||
86 | if (!buf) { | ||
87 | dev_info(dev_from_gk20a(g), "ioremap_cache failed\n"); | ||
88 | err = -EINVAL; | ||
89 | goto fail; | ||
90 | } | ||
91 | |||
92 | data->hw_snapshot = buf; | ||
93 | data->hw_end = data->hw_snapshot + | ||
94 | css_cookie->size / sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
95 | data->hw_get = data->hw_snapshot; | ||
96 | memset(data->hw_snapshot, 0xff, css_cookie->size); | ||
97 | return 0; | ||
98 | fail: | ||
99 | if (!IS_ERR_OR_NULL(css_cookie)) | ||
100 | tegra_hv_mempool_unreserve(css_cookie); | ||
101 | return err; | ||
102 | } | ||
103 | |||
104 | static void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr) | ||
105 | { | ||
106 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
107 | |||
108 | if (!data->hw_snapshot) | ||
109 | return; | ||
110 | |||
111 | iounmap(data->hw_snapshot); | ||
112 | data->hw_snapshot = NULL; | ||
113 | |||
114 | tegra_hv_mempool_unreserve(css_cookie); | ||
115 | |||
116 | gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n"); | ||
117 | } | ||
118 | |||
119 | static int vgpu_css_flush_snapshots(struct channel_gk20a *ch, | ||
120 | u32 *pending, bool *hw_overflow) | ||
121 | { | ||
122 | struct gk20a *g = ch->g; | ||
123 | struct tegra_vgpu_cmd_msg msg = {}; | ||
124 | struct tegra_vgpu_channel_cyclestats_snapshot_params *p; | ||
125 | struct gr_gk20a *gr = &g->gr; | ||
126 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
127 | int err; | ||
128 | |||
129 | gk20a_dbg_fn(""); | ||
130 | |||
131 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; | ||
132 | msg.handle = vgpu_get_handle(g); | ||
133 | p = &msg.params.cyclestats_snapshot; | ||
134 | p->handle = ch->virt_ctx; | ||
135 | p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH; | ||
136 | p->buf_info = (uintptr_t)data->hw_get - (uintptr_t)data->hw_snapshot; | ||
137 | |||
138 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
139 | |||
140 | err = (err || msg.ret) ? -1 : 0; | ||
141 | |||
142 | *pending = p->buf_info; | ||
143 | *hw_overflow = p->hw_overflow; | ||
144 | |||
145 | return err; | ||
146 | } | ||
147 | |||
148 | static int vgpu_css_attach(struct channel_gk20a *ch, | ||
149 | struct gk20a_cs_snapshot_client *cs_client) | ||
150 | { | ||
151 | struct gk20a *g = ch->g; | ||
152 | struct tegra_vgpu_cmd_msg msg = {}; | ||
153 | struct tegra_vgpu_channel_cyclestats_snapshot_params *p = | ||
154 | &msg.params.cyclestats_snapshot; | ||
155 | int err; | ||
156 | |||
157 | gk20a_dbg_fn(""); | ||
158 | |||
159 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; | ||
160 | msg.handle = vgpu_get_handle(g); | ||
161 | p->handle = ch->virt_ctx; | ||
162 | p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH; | ||
163 | p->perfmon_count = cs_client->perfmon_count; | ||
164 | |||
165 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
166 | err = err ? err : msg.ret; | ||
167 | if (err) | ||
168 | gk20a_err(dev_from_gk20a(g), "%s failed", __func__); | ||
169 | else | ||
170 | cs_client->perfmon_start = p->perfmon_start; | ||
171 | |||
172 | return err; | ||
173 | } | ||
174 | |||
175 | static int vgpu_css_detach(struct channel_gk20a *ch, | ||
176 | struct gk20a_cs_snapshot_client *cs_client) | ||
177 | { | ||
178 | struct gk20a *g = ch->g; | ||
179 | struct tegra_vgpu_cmd_msg msg = {}; | ||
180 | struct tegra_vgpu_channel_cyclestats_snapshot_params *p = | ||
181 | &msg.params.cyclestats_snapshot; | ||
182 | int err; | ||
183 | |||
184 | gk20a_dbg_fn(""); | ||
185 | |||
186 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; | ||
187 | msg.handle = vgpu_get_handle(g); | ||
188 | p->handle = ch->virt_ctx; | ||
189 | p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH; | ||
190 | p->perfmon_start = cs_client->perfmon_start; | ||
191 | p->perfmon_count = cs_client->perfmon_count; | ||
192 | |||
193 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
194 | err = err ? err : msg.ret; | ||
195 | if (err) | ||
196 | gk20a_err(dev_from_gk20a(g), "%s failed", __func__); | ||
197 | |||
198 | return err; | ||
199 | } | ||
200 | |||
201 | static int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch, | ||
202 | struct gk20a_cs_snapshot_client *cs_client) | ||
203 | { | ||
204 | int ret; | ||
205 | |||
206 | ret = vgpu_css_attach(ch, cs_client); | ||
207 | if (ret) | ||
208 | return ret; | ||
209 | |||
210 | ret = vgpu_css_init_snapshot_buffer(&ch->g->gr); | ||
211 | return ret; | ||
212 | } | ||
213 | |||
214 | void vgpu_init_css_ops(struct gpu_ops *gops) | ||
215 | { | ||
216 | gops->css.enable_snapshot = vgpu_css_enable_snapshot_buffer; | ||
217 | gops->css.disable_snapshot = vgpu_css_release_snapshot_buffer; | ||
218 | gops->css.check_data_available = vgpu_css_flush_snapshots; | ||
219 | gops->css.detach_snapshot = vgpu_css_detach; | ||
220 | } | ||
221 | #endif /* CONFIG_GK20A_CYCLE_STATS */ | ||
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 783b2f55..89223091 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c | |||
@@ -875,6 +875,10 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) | |||
875 | 875 | ||
876 | gr->g = g; | 876 | gr->g = g; |
877 | 877 | ||
878 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
879 | mutex_init(&g->gr.cs_lock); | ||
880 | #endif | ||
881 | |||
878 | err = vgpu_gr_init_gr_config(g, gr); | 882 | err = vgpu_gr_init_gr_config(g, gr); |
879 | if (err) | 883 | if (err) |
880 | goto clean_up; | 884 | goto clean_up; |
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 27d98eb8..bd332583 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -334,6 +334,9 @@ void vgpu_init_hal_common(struct gk20a *g) | |||
334 | vgpu_init_dbg_session_ops(gops); | 334 | vgpu_init_dbg_session_ops(gops); |
335 | vgpu_init_fecs_trace_ops(gops); | 335 | vgpu_init_fecs_trace_ops(gops); |
336 | vgpu_init_tsg_ops(gops); | 336 | vgpu_init_tsg_ops(gops); |
337 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
338 | vgpu_init_css_ops(gops); | ||
339 | #endif | ||
337 | gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; | 340 | gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; |
338 | gops->read_ptimer = vgpu_read_ptimer; | 341 | gops->read_ptimer = vgpu_read_ptimer; |
339 | } | 342 | } |
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h index 6f1059b8..4a7a6b6c 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/vgpu.h | |||
@@ -85,6 +85,9 @@ void vgpu_init_ltc_ops(struct gpu_ops *gops); | |||
85 | void vgpu_init_mm_ops(struct gpu_ops *gops); | 85 | void vgpu_init_mm_ops(struct gpu_ops *gops); |
86 | void vgpu_init_debug_ops(struct gpu_ops *gops); | 86 | void vgpu_init_debug_ops(struct gpu_ops *gops); |
87 | void vgpu_init_tsg_ops(struct gpu_ops *gops); | 87 | void vgpu_init_tsg_ops(struct gpu_ops *gops); |
88 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
89 | void vgpu_init_css_ops(struct gpu_ops *gops); | ||
90 | #endif | ||
88 | int vgpu_init_mm_support(struct gk20a *g); | 91 | int vgpu_init_mm_support(struct gk20a *g); |
89 | int vgpu_init_gr_support(struct gk20a *g); | 92 | int vgpu_init_gr_support(struct gk20a *g); |
90 | int vgpu_init_fifo_support(struct gk20a *g); | 93 | int vgpu_init_fifo_support(struct gk20a *g); |
@@ -161,6 +164,11 @@ static inline void vgpu_init_mm_ops(struct gpu_ops *gops) | |||
161 | static inline void vgpu_init_debug_ops(struct gpu_ops *gops) | 164 | static inline void vgpu_init_debug_ops(struct gpu_ops *gops) |
162 | { | 165 | { |
163 | } | 166 | } |
167 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
168 | static inline void vgpu_init_css_ops(struct gpu_ops *gops) | ||
169 | { | ||
170 | } | ||
171 | #endif | ||
164 | static inline int vgpu_init_mm_support(struct gk20a *g) | 172 | static inline int vgpu_init_mm_support(struct gk20a *g) |
165 | { | 173 | { |
166 | return -ENOSYS; | 174 | return -ENOSYS; |
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index fdab9b06..be8b9ad1 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h | |||
@@ -97,6 +97,7 @@ enum { | |||
97 | TEGRA_VGPU_CMD_SET_POWERGATE = 60, | 97 | TEGRA_VGPU_CMD_SET_POWERGATE = 60, |
98 | TEGRA_VGPU_CMD_SET_GPU_CLK_RATE = 61, | 98 | TEGRA_VGPU_CMD_SET_GPU_CLK_RATE = 61, |
99 | TEGRA_VGPU_CMD_GET_CONSTANTS = 62, | 99 | TEGRA_VGPU_CMD_GET_CONSTANTS = 62, |
100 | TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT = 63, | ||
100 | }; | 101 | }; |
101 | 102 | ||
102 | struct tegra_vgpu_connect_params { | 103 | struct tegra_vgpu_connect_params { |
@@ -437,6 +438,15 @@ struct tegra_vgpu_constants_params { | |||
437 | u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT]; | 438 | u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT]; |
438 | }; | 439 | }; |
439 | 440 | ||
441 | struct tegra_vgpu_channel_cyclestats_snapshot_params { | ||
442 | u64 handle; | ||
443 | u32 perfmon_start; | ||
444 | u32 perfmon_count; | ||
445 | u32 buf_info; /* client->srvr: get ptr; srvr->client: num pending */ | ||
446 | u8 subcmd; | ||
447 | u8 hw_overflow; | ||
448 | }; | ||
449 | |||
440 | struct tegra_vgpu_cmd_msg { | 450 | struct tegra_vgpu_cmd_msg { |
441 | u32 cmd; | 451 | u32 cmd; |
442 | int ret; | 452 | int ret; |
@@ -481,6 +491,7 @@ struct tegra_vgpu_cmd_msg { | |||
481 | struct tegra_vgpu_set_powergate_params set_powergate; | 491 | struct tegra_vgpu_set_powergate_params set_powergate; |
482 | struct tegra_vgpu_gpu_clk_rate_params gpu_clk_rate; | 492 | struct tegra_vgpu_gpu_clk_rate_params gpu_clk_rate; |
483 | struct tegra_vgpu_constants_params constants; | 493 | struct tegra_vgpu_constants_params constants; |
494 | struct tegra_vgpu_channel_cyclestats_snapshot_params cyclestats_snapshot; | ||
484 | char padding[192]; | 495 | char padding[192]; |
485 | } params; | 496 | } params; |
486 | }; | 497 | }; |