summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c759
1 files changed, 759 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
new file mode 100644
index 00000000..7509acd7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -0,0 +1,759 @@
1/*
2 * GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
3 *
4 * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/bitops.h>
20#include <linux/dma-mapping.h>
21#include <linux/dma-buf.h>
22#include <linux/mutex.h>
23#include <linux/vmalloc.h>
24
25#include "gk20a.h"
26#include "hw_perf_gk20a.h"
27#include "hw_mc_gk20a.h"
28
29
30
31/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32struct gk20a_cs_snapshot_fifo {
33 /* layout description of the buffer */
34 u32 start;
35 u32 end;
36
37 /* snafu bits */
38 u32 hw_overflow_events_occured;
39 u32 sw_overflow_events_occured;
40
41 /* the kernel copies new entries to put and
42 * increment the put++. if put == get then
43 * overflowEventsOccured++
44 */
45 u32 put;
46 u32 _reserved10;
47 u32 _reserved11;
48 u32 _reserved12;
49
50 /* the driver/client reads from get until
51 * put==get, get++ */
52 u32 get;
53 u32 _reserved20;
54 u32 _reserved21;
55 u32 _reserved22;
56
57 /* unused */
58 u32 _reserved30;
59 u32 _reserved31;
60 u32 _reserved32;
61 u32 _reserved33;
62};
63
64/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
65struct gk20a_cs_snapshot_fifo_entry {
66 /* global 48 timestamp */
67 u32 timestamp31_00:32;
68 u32 timestamp39_32:8;
69
70 /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
71 u32 perfmon_id:8;
72
73 /* typically samples_counter is wired to #pmtrigger count */
74 u32 samples_counter:12;
75
76 /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
77 u32 ds:1;
78 u32 sz:1;
79 u32 zero0:1;
80 u32 zero1:1;
81
82 /* counter results */
83 u32 event_cnt:32;
84 u32 trigger0_cnt:32;
85 u32 trigger1_cnt:32;
86 u32 sample_cnt:32;
87
88 /* Local PmTrigger results for Maxwell+ or padding otherwise */
89 u16 local_trigger_b_count:16;
90 u16 book_mark_b:16;
91 u16 local_trigger_a_count:16;
92 u16 book_mark_a:16;
93};
94
95
96/* cycle stats snapshot client data (e.g. associated with channel) */
97struct gk20a_cs_snapshot_client {
98 struct list_head list;
99 u32 dmabuf_fd;
100 struct dma_buf *dma_handler;
101 struct gk20a_cs_snapshot_fifo *snapshot;
102 u32 snapshot_size;
103 u32 perfmon_start;
104 u32 perfmon_count;
105};
106
107/* check client for pointed perfmon ownership */
108#define CONTAINS_PERFMON(cl, pm) \
109 ((cl)->perfmon_start <= (pm) && \
110 ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
111
112/* the minimal size of HW buffer - should be enough to avoid HW overflows */
113#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
114
115/* the minimal size of client buffer */
116#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
117 (sizeof(struct gk20a_cs_snapshot_fifo) + \
118 sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
119
120/* address of fifo entry by offset */
121#define CSS_FIFO_ENTRY(fifo, offs) \
122 ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
123
124/* calculate area capacity in number of fifo entries */
125#define CSS_FIFO_ENTRY_CAPACITY(s) \
126 (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \
127 / sizeof(struct gk20a_cs_snapshot_fifo_entry))
128
129/* reserved to indicate failures with data */
130#define CSS_FIRST_PERFMON_ID 32
131/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
132#define CSS_MAX_PERFMON_IDS 256
133
134
135/* this type is used for storing bits in perfmon mask */
136typedef u32 css_perfmon_t;
137
138/* local definitions to avoid hardcodes sizes and shifts */
139#define PM_BITS (sizeof(css_perfmon_t) * BITS_PER_BYTE)
140#define PM_BITS_MASK (PM_BITS - 1)
141
142#define PM_BITMAP_SIZE ((CSS_MAX_PERFMON_IDS + PM_BITS - 1) / PM_BITS)
143
144#define PM_SLOT(i) ((i) / PM_BITS)
145#define PM_SHIFT(i) ((i) & PM_BITS_MASK)
146#define PM_BIT(i) (1u << PM_SHIFT(i))
147
148#define CSS_PERFMON_GET(p, i) (1 == ((p[PM_SLOT(i)] >> PM_SHIFT(i)) & 1))
149#define CSS_PERFMON_USE(p, i) (p[PM_SLOT(i)] |= PM_BIT(i))
150#define CSS_PERFMON_REL(p, i) (p[PM_SLOT(i)] &= ~PM_BIT(i))
151
152
153/* cycle stats snapshot control structure for one HW entry and many clients */
154struct gk20a_cs_snapshot {
155 css_perfmon_t perfmon_ids[PM_BITMAP_SIZE];
156 struct list_head clients;
157 struct mem_desc hw_memdesc;
158 /* pointer to allocated cpu_va memory where GPU place data */
159 struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
160 struct gk20a_cs_snapshot_fifo_entry *hw_end;
161 struct gk20a_cs_snapshot_fifo_entry *hw_get;
162};
163
164/* reports whether the hw queue overflowed */
165static inline bool css_hw_get_overflow_status(struct gk20a *g)
166{
167 const u32 st = perf_pmasys_control_membuf_status_overflowed_f();
168 return st == (gk20a_readl(g, perf_pmasys_control_r()) & st);
169}
170
171/* returns how many pending snapshot entries are pending */
172static inline u32 css_hw_get_pending_snapshots(struct gk20a *g)
173{
174 return gk20a_readl(g, perf_pmasys_mem_bytes_r()) /
175 sizeof(struct gk20a_cs_snapshot_fifo_entry);
176}
177
178/* informs hw how many snapshots have been processed (frees up fifo space) */
179static inline void css_hw_set_handled_snapshots(struct gk20a *g, u32 done)
180{
181 if (done > 0) {
182 gk20a_writel(g, perf_pmasys_mem_bump_r(),
183 done * sizeof(struct gk20a_cs_snapshot_fifo_entry));
184 }
185}
186
187/* disable streaming to memory */
188static void css_hw_reset_streaming(struct gk20a *g)
189{
190 u32 engine_status;
191 u32 old_pmc = gk20a_readl(g, mc_enable_r());
192
193 /* reset the perfmon */
194 gk20a_writel(g, mc_enable_r(),
195 old_pmc & ~mc_enable_perfmon_enabled_f());
196 gk20a_writel(g, mc_enable_r(), old_pmc);
197
198 /* RBUFEMPTY must be set -- otherwise we'll pick up */
199 /* snapshot that have been queued up from earlier */
200 engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r());
201 WARN_ON(0 == (engine_status
202 & perf_pmasys_enginestatus_rbufempty_empty_f()));
203
204 /* turn off writes */
205 gk20a_writel(g, perf_pmasys_control_r(),
206 perf_pmasys_control_membuf_clear_status_doit_f());
207
208 /* pointing all pending snapshots as handled */
209 css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g));
210}
211
212/*
213 * WARNING: all css_gr_XXX functions are local and expected to be called
214 * from locked context (protected by cs_lock)
215 */
216
217static int css_gr_create_shared_data(struct gr_gk20a *gr)
218{
219 struct gk20a_cs_snapshot *data;
220
221 if (gr->cs_data)
222 return 0;
223
224 data = kzalloc(sizeof(*data), GFP_KERNEL);
225 if (!data)
226 return -ENOMEM;
227
228 INIT_LIST_HEAD(&data->clients);
229 gr->cs_data = data;
230
231 return 0;
232}
233
234static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size)
235{
236 struct gk20a *g = gr->g;
237 struct gk20a_cs_snapshot *data = gr->cs_data;
238 int ret;
239
240 u32 virt_addr_lo;
241 u32 virt_addr_hi;
242 u32 inst_pa_page;
243
244 if (data->hw_snapshot)
245 return 0;
246
247 if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
248 snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;
249
250 ret = gk20a_gmmu_alloc_map(&g->mm.pmu.vm, snapshot_size,
251 &data->hw_memdesc);
252 if (ret)
253 return ret;
254
255 /* perf output buffer may not cross a 4GB boundary - with a separate */
256 /* va smaller than that, it won't but check anyway */
257 if (!data->hw_memdesc.cpu_va ||
258 data->hw_memdesc.size < snapshot_size ||
259 data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) {
260 ret = -EFAULT;
261 goto failed_allocation;
262 }
263
264 data->hw_snapshot =
265 (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va;
266 data->hw_end = data->hw_snapshot +
267 snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
268 data->hw_get = data->hw_snapshot;
269 memset(data->hw_snapshot, 0xff, snapshot_size);
270
271 /* address and size are aligned to 32 bytes, the lowest bits read back
272 * as zeros */
273 virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va);
274 virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va);
275
276 css_hw_reset_streaming(g);
277
278 gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo);
279 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
280 perf_pmasys_outbaseupper_ptr_f(virt_addr_hi));
281 gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
282
283 /* this field is aligned to 4K */
284 inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12;
285
286 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
287 * should be written last */
288 gk20a_writel(g, perf_pmasys_mem_block_r(),
289 perf_pmasys_mem_block_base_f(inst_pa_page) |
290 perf_pmasys_mem_block_valid_true_f() |
291 perf_pmasys_mem_block_target_lfb_f());
292
293 gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n");
294
295 return 0;
296
297failed_allocation:
298 if (data->hw_memdesc.size) {
299 gk20a_gmmu_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc);
300 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
301 }
302 data->hw_snapshot = NULL;
303
304 return ret;
305}
306
307static void css_hw_disable_snapshot(struct gr_gk20a *gr)
308{
309 struct gk20a *g = gr->g;
310 struct gk20a_cs_snapshot *data = gr->cs_data;
311
312 if (!data->hw_snapshot)
313 return;
314
315 css_hw_reset_streaming(g);
316
317 gk20a_writel(g, perf_pmasys_outbase_r(), 0);
318 gk20a_writel(g, perf_pmasys_outbaseupper_r(),
319 perf_pmasys_outbaseupper_ptr_f(0));
320 gk20a_writel(g, perf_pmasys_outsize_r(), 0);
321
322 gk20a_writel(g, perf_pmasys_mem_block_r(),
323 perf_pmasys_mem_block_base_f(0) |
324 perf_pmasys_mem_block_valid_false_f() |
325 perf_pmasys_mem_block_target_f(0));
326
327 gk20a_gmmu_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc);
328 memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
329 data->hw_snapshot = NULL;
330
331 gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n");
332}
333
334static void css_gr_free_shared_data(struct gr_gk20a *gr)
335{
336 if (gr->cs_data) {
337 /* the clients list is expected to be empty */
338 css_hw_disable_snapshot(gr);
339
340 /* release the objects */
341 kfree(gr->cs_data);
342 gr->cs_data = NULL;
343 }
344}
345
346
347static struct gk20a_cs_snapshot_client*
348css_gr_search_client(struct list_head *clients, u32 perfmon)
349{
350 struct list_head *pos;
351
352 list_for_each(pos, clients) {
353 struct gk20a_cs_snapshot_client *client =
354 container_of(pos,
355 struct gk20a_cs_snapshot_client, list);
356 if (CONTAINS_PERFMON(client, perfmon))
357 return client;
358 }
359
360 return NULL;
361}
362
363static int css_gr_flush_snapshots(struct gr_gk20a *gr)
364{
365 struct gk20a *g = gr->g;
366 struct gk20a_cs_snapshot *css = gr->cs_data;
367 struct gk20a_cs_snapshot_client *cur;
368 u32 pending;
369
370 /* variables for iterating over HW entries */
371 u32 sid;
372 struct gk20a_cs_snapshot_fifo_entry *src;
373
374 /* due to data sharing with userspace we allowed update only */
375 /* overflows and put field in the fifo header */
376 struct gk20a_cs_snapshot_fifo *dst;
377 struct gk20a_cs_snapshot_fifo_entry *dst_get;
378 struct gk20a_cs_snapshot_fifo_entry *dst_put;
379 struct gk20a_cs_snapshot_fifo_entry *dst_head;
380 struct gk20a_cs_snapshot_fifo_entry *dst_tail;
381
382 if (!css)
383 return -EINVAL;
384
385 if (!css->hw_snapshot)
386 return -EINVAL;
387
388 if (list_empty(&css->clients))
389 return -EBADF;
390
391 /* check data available */
392 pending = css_hw_get_pending_snapshots(g);
393 if (!pending)
394 return 0;
395
396 if (css_hw_get_overflow_status(g)) {
397 struct list_head *pos;
398
399 list_for_each(pos, &css->clients) {
400 cur = container_of(pos,
401 struct gk20a_cs_snapshot_client, list);
402 cur->snapshot->hw_overflow_events_occured++;
403 }
404
405 gk20a_warn(dev_from_gk20a(g),
406 "cyclestats: hardware overflow detected\n");
407 }
408
409 /* proceed all items in HW buffer */
410 sid = 0;
411 cur = NULL;
412 dst = NULL;
413 dst_put = NULL;
414 src = css->hw_get;
415
416 /* proceed all completed records */
417 while (sid < pending && 0 == src->zero0) {
418 /* we may have a new perfmon_id which required to */
419 /* switch to a new client -> let's forget current */
420 if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) {
421 dst->put = (char *)dst_put - (char *)dst;
422 dst = NULL;
423 cur = NULL;
424 }
425
426 /* now we have to select a new current client */
427 /* the client selection rate depends from experiment */
428 /* activity but on Android usually happened 1-2 times */
429 if (!cur) {
430 cur = css_gr_search_client(&css->clients,
431 src->perfmon_id);
432 if (cur) {
433 /* found - setup all required data */
434 dst = cur->snapshot;
435 dst_get = CSS_FIFO_ENTRY(dst, dst->get);
436 dst_put = CSS_FIFO_ENTRY(dst, dst->put);
437 dst_head = CSS_FIFO_ENTRY(dst, dst->start);
438 dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1;
439 } else {
440 /* client not found - skipping this entry */
441 gk20a_warn(dev_from_gk20a(g),
442 "cyclestats: orphaned perfmon %u\n",
443 src->perfmon_id);
444 goto next_hw_fifo_entry;
445 }
446 }
447
448 /* check for software overflows */
449 if (dst_put + 1 == dst_get ||
450 (dst_put == dst_tail && dst_get == dst_head)) {
451 /* no data copy, no pointer updates */
452 dst->sw_overflow_events_occured++;
453 gk20a_warn(dev_from_gk20a(g),
454 "cyclestats: perfmon %u soft overflow\n",
455 src->perfmon_id);
456 } else {
457 *dst_put = *src;
458 if (dst_put == dst_tail)
459 dst_put = dst_head;
460 else
461 dst_put++;
462 }
463
464next_hw_fifo_entry:
465 sid++;
466 if (++src >= css->hw_end)
467 src = css->hw_snapshot;
468 }
469
470 /* update client put pointer if necessary */
471 if (cur && dst)
472 dst->put = (char *)dst_put - (char *)dst;
473
474 /* re-set HW buffer after processing taking wrapping into account */
475 if (css->hw_get < src) {
476 memset(css->hw_get, 0xff, (src - css->hw_get) * sizeof(*src));
477 } else {
478 memset(css->hw_snapshot, 0xff,
479 (src - css->hw_snapshot) * sizeof(*src));
480 memset(css->hw_get, 0xff,
481 (css->hw_end - css->hw_get) * sizeof(*src));
482 }
483 gr->cs_data->hw_get = src;
484 css_hw_set_handled_snapshots(g, sid);
485 if (pending != sid) {
486 /* not all entries proceed correctly. some of problems */
487 /* reported as overflows, some as orphaned perfmons, */
488 /* but it will be better notify with summary about it */
489 gk20a_warn(dev_from_gk20a(g),
490 "cyclestats: done %u from %u entries\n",
491 sid, pending);
492 }
493
494 return 0;
495}
496
497static u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
498 u32 count)
499{
500 u32 *pids = data->perfmon_ids;
501 u32 f;
502 u32 e = CSS_MAX_PERFMON_IDS - count;
503
504 if (!count || count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID)
505 return 0;
506
507 for (f = CSS_FIRST_PERFMON_ID; f < e; f++) {
508 u32 slots = 0;
509 u32 cur;
510 u32 end = f + count;
511
512 /* lookup for continuous hole [f, f+count) of unused bits */
513 for (cur = f; cur < end; cur++) {
514 if (CSS_PERFMON_GET(pids, cur))
515 break;
516 slots++;
517 }
518
519 if (count == slots) {
520 /* we found of hole of unused bits with required */
521 /* length -> can occupy it for our perfmon IDs */
522 for (cur = f; cur < end; cur++)
523 CSS_PERFMON_USE(pids, cur);
524
525 return f;
526 }
527 }
528
529 return 0;
530}
531
532static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
533 u32 start,
534 u32 count)
535{
536 u32 *pids = data->perfmon_ids;
537 u32 end = start + count;
538 u32 cnt = 0;
539
540 if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) {
541 u32 i;
542 for (i = start; i < end; i++) {
543 if (CSS_PERFMON_GET(pids, i)) {
544 CSS_PERFMON_REL(pids, i);
545 cnt++;
546 }
547 }
548 }
549
550 return cnt;
551}
552
553
554static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
555 struct gk20a_cs_snapshot_client *client)
556{
557 int ret = 0;
558
559 list_del(&client->list);
560 if (client->perfmon_start && client->perfmon_count) {
561 if (client->perfmon_count != css_gr_release_perfmon_ids(data,
562 client->perfmon_start, client->perfmon_count))
563 ret = -EINVAL;
564 }
565 if (client->dma_handler) {
566 dma_buf_vunmap(client->dma_handler, client->snapshot);
567 dma_buf_put(client->dma_handler);
568 }
569
570 kfree(client);
571
572 return ret;
573}
574
575static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
576 u32 dmabuf_fd, u32 perfmon_count,
577 struct gk20a_cs_snapshot_client **client)
578{
579 struct gk20a_cs_snapshot_client *cur;
580 int ret = 0;
581
582 cur = kzalloc(sizeof(*cur), GFP_KERNEL);
583 if (!cur) {
584 ret = -ENOMEM;
585 goto failed;
586 }
587
588 cur->dmabuf_fd = dmabuf_fd;
589 cur->dma_handler = dma_buf_get(cur->dmabuf_fd);
590 if (IS_ERR(cur->dma_handler)) {
591 ret = PTR_ERR(cur->dma_handler);
592 cur->dma_handler = NULL;
593 goto failed;
594 }
595
596 cur->snapshot = (struct gk20a_cs_snapshot_fifo *)
597 dma_buf_vmap(cur->dma_handler);
598 if (!cur->snapshot) {
599 ret = -ENOMEM;
600 goto failed;
601 }
602
603 cur->snapshot_size = cur->dma_handler->size;
604 if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
605 ret = -ENOMEM;
606 goto failed;
607 }
608
609 memset(cur->snapshot, 0, sizeof(*cur->snapshot));
610 cur->snapshot->start = sizeof(*cur->snapshot);
611 /* we should be ensure that can fit all fifo entries here */
612 cur->snapshot->end =
613 CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size)
614 * sizeof(struct gk20a_cs_snapshot_fifo_entry)
615 + sizeof(struct gk20a_cs_snapshot_fifo);
616 cur->snapshot->get = cur->snapshot->start;
617 cur->snapshot->put = cur->snapshot->start;
618
619 cur->perfmon_count = perfmon_count;
620 if (cur->perfmon_count) {
621 cur->perfmon_start = css_gr_allocate_perfmon_ids(data,
622 cur->perfmon_count);
623 if (!cur->perfmon_start) {
624 ret = -ENOENT;
625 goto failed;
626 }
627 }
628
629 list_add_tail(&cur->list, &data->clients);
630 *client = cur;
631
632 return 0;
633
634failed:
635 *client = NULL;
636 if (cur)
637 css_gr_free_client_data(data, cur);
638
639 return ret;
640}
641
642
643int gr_gk20a_css_attach(struct gk20a *g,
644 u32 dmabuf_fd,
645 u32 perfmon_count,
646 u32 *perfmon_start,
647 struct gk20a_cs_snapshot_client **cs_client)
648{
649 int ret = 0;
650 struct gr_gk20a *gr;
651
652 if (!g->allow_all)
653 return -EACCES;
654 /* we must have a placeholder to store pointer to client structure */
655 if (!cs_client)
656 return -EINVAL;
657
658 gr = &g->gr;
659 *cs_client = NULL;
660
661 mutex_lock(&gr->cs_lock);
662
663 ret = css_gr_create_shared_data(gr);
664 if (ret)
665 goto failed;
666
667 ret = css_gr_create_client_data(gr->cs_data,
668 dmabuf_fd,
669 perfmon_count,
670 cs_client);
671 if (ret)
672 goto failed;
673
674 ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size);
675 if (ret)
676 goto failed;
677
678 if (perfmon_start)
679 *perfmon_start = (*cs_client)->perfmon_start;
680
681 mutex_unlock(&gr->cs_lock);
682
683 return 0;
684
685failed:
686 if (gr->cs_data) {
687 if (*cs_client) {
688 css_gr_free_client_data(gr->cs_data, *cs_client);
689 *cs_client = NULL;
690 }
691
692 if (list_empty(&gr->cs_data->clients))
693 css_gr_free_shared_data(gr);
694 }
695 mutex_unlock(&gr->cs_lock);
696
697 if (perfmon_start)
698 *perfmon_start = 0;
699
700 return ret;
701}
702
703int gr_gk20a_css_detach(struct gk20a *g,
704 struct gk20a_cs_snapshot_client *cs_client)
705{
706 int ret = 0;
707 struct gr_gk20a *gr;
708
709 if (!g->allow_all)
710 return -EACCES;
711
712 if (!cs_client)
713 return -EINVAL;
714
715 gr = &g->gr;
716 mutex_lock(&gr->cs_lock);
717 if (gr->cs_data) {
718 struct gk20a_cs_snapshot *data = gr->cs_data;
719
720 ret = css_gr_free_client_data(data, cs_client);
721 if (list_empty(&data->clients))
722 css_gr_free_shared_data(gr);
723 } else {
724 ret = -EBADF;
725 }
726 mutex_unlock(&gr->cs_lock);
727
728 return ret;
729}
730
731int gr_gk20a_css_flush(struct gk20a *g,
732 struct gk20a_cs_snapshot_client *cs_client)
733{
734 int ret = 0;
735 struct gr_gk20a *gr;
736
737 if (!g->allow_all)
738 return -EACCES;
739
740 if (!cs_client)
741 return -EINVAL;
742
743 gr = &g->gr;
744 mutex_lock(&gr->cs_lock);
745 ret = css_gr_flush_snapshots(gr);
746 mutex_unlock(&gr->cs_lock);
747
748 return ret;
749}
750
751/* helper function with locking to cleanup snapshot code code in gr_gk20a.c */
752void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
753{
754 struct gr_gk20a *gr = &g->gr;
755
756 mutex_lock(&gr->cs_lock);
757 css_gr_free_shared_data(gr);
758 mutex_unlock(&gr->cs_lock);
759}