diff options
Diffstat (limited to 'include/gk20a/css_gr_gk20a.c')
-rw-r--r-- | include/gk20a/css_gr_gk20a.c | 636 |
1 files changed, 636 insertions, 0 deletions
diff --git a/include/gk20a/css_gr_gk20a.c b/include/gk20a/css_gr_gk20a.c new file mode 100644 index 0000000..28a3d49 --- /dev/null +++ b/include/gk20a/css_gr_gk20a.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/bitops.h> | ||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/lock.h> | ||
28 | #include <nvgpu/dma.h> | ||
29 | #include <nvgpu/mm.h> | ||
30 | #include <nvgpu/sizes.h> | ||
31 | #include <nvgpu/barrier.h> | ||
32 | #include <nvgpu/log.h> | ||
33 | #include <nvgpu/bug.h> | ||
34 | #include <nvgpu/io.h> | ||
35 | #include <nvgpu/utils.h> | ||
36 | #include <nvgpu/channel.h> | ||
37 | #include <nvgpu/unit.h> | ||
38 | |||
39 | #include "gk20a.h" | ||
40 | #include "css_gr_gk20a.h" | ||
41 | |||
42 | #include <nvgpu/hw/gk20a/hw_perf_gk20a.h> | ||
43 | |||
44 | /* check client for pointed perfmon ownership */ | ||
45 | #define CONTAINS_PERFMON(cl, pm) \ | ||
46 | ((cl)->perfmon_start <= (pm) && \ | ||
47 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) | ||
48 | |||
49 | /* address of fifo entry by offset */ | ||
50 | #define CSS_FIFO_ENTRY(fifo, offs) \ | ||
51 | ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs))) | ||
52 | |||
53 | /* calculate area capacity in number of fifo entries */ | ||
54 | #define CSS_FIFO_ENTRY_CAPACITY(s) \ | ||
55 | (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \ | ||
56 | / sizeof(struct gk20a_cs_snapshot_fifo_entry)) | ||
57 | |||
58 | /* reserved to indicate failures with data */ | ||
59 | #define CSS_FIRST_PERFMON_ID 32 | ||
60 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
61 | #define CSS_MAX_PERFMON_IDS 256 | ||
62 | |||
63 | /* reports whether the hw queue overflowed */ | ||
64 | bool css_hw_get_overflow_status(struct gk20a *g) | ||
65 | { | ||
66 | const u32 st = perf_pmasys_control_membuf_status_overflowed_f(); | ||
67 | return st == (gk20a_readl(g, perf_pmasys_control_r()) & st); | ||
68 | } | ||
69 | |||
70 | /* returns how many pending snapshot entries are pending */ | ||
71 | u32 css_hw_get_pending_snapshots(struct gk20a *g) | ||
72 | { | ||
73 | return gk20a_readl(g, perf_pmasys_mem_bytes_r()) / | ||
74 | sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
75 | } | ||
76 | |||
77 | /* informs hw how many snapshots have been processed (frees up fifo space) */ | ||
78 | void css_hw_set_handled_snapshots(struct gk20a *g, u32 done) | ||
79 | { | ||
80 | if (done > 0) { | ||
81 | gk20a_writel(g, perf_pmasys_mem_bump_r(), | ||
82 | done * sizeof(struct gk20a_cs_snapshot_fifo_entry)); | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /* disable streaming to memory */ | ||
87 | static void css_hw_reset_streaming(struct gk20a *g) | ||
88 | { | ||
89 | u32 engine_status; | ||
90 | |||
91 | /* reset the perfmon */ | ||
92 | g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_PERFMON)); | ||
93 | |||
94 | /* RBUFEMPTY must be set -- otherwise we'll pick up */ | ||
95 | /* snapshot that have been queued up from earlier */ | ||
96 | engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); | ||
97 | WARN_ON(0 == (engine_status | ||
98 | & perf_pmasys_enginestatus_rbufempty_empty_f())); | ||
99 | |||
100 | /* turn off writes */ | ||
101 | gk20a_writel(g, perf_pmasys_control_r(), | ||
102 | perf_pmasys_control_membuf_clear_status_doit_f()); | ||
103 | |||
104 | /* pointing all pending snapshots as handled */ | ||
105 | css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g)); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * WARNING: all css_gr_XXX functions are local and expected to be called | ||
110 | * from locked context (protected by cs_lock) | ||
111 | */ | ||
112 | |||
113 | static int css_gr_create_shared_data(struct gr_gk20a *gr) | ||
114 | { | ||
115 | struct gk20a_cs_snapshot *data; | ||
116 | |||
117 | if (gr->cs_data) | ||
118 | return 0; | ||
119 | |||
120 | data = nvgpu_kzalloc(gr->g, sizeof(*data)); | ||
121 | if (!data) | ||
122 | return -ENOMEM; | ||
123 | |||
124 | nvgpu_init_list_node(&data->clients); | ||
125 | gr->cs_data = data; | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | int css_hw_enable_snapshot(struct channel_gk20a *ch, | ||
131 | struct gk20a_cs_snapshot_client *cs_client) | ||
132 | { | ||
133 | struct gk20a *g = ch->g; | ||
134 | struct mm_gk20a *mm = &g->mm; | ||
135 | struct gr_gk20a *gr = &g->gr; | ||
136 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
137 | u32 snapshot_size = cs_client->snapshot_size; | ||
138 | int ret; | ||
139 | |||
140 | u32 virt_addr_lo; | ||
141 | u32 virt_addr_hi; | ||
142 | u32 inst_pa_page; | ||
143 | |||
144 | if (data->hw_snapshot) | ||
145 | return 0; | ||
146 | |||
147 | if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE) | ||
148 | snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE; | ||
149 | |||
150 | ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size, | ||
151 | &data->hw_memdesc); | ||
152 | if (ret) | ||
153 | return ret; | ||
154 | |||
155 | /* perf output buffer may not cross a 4GB boundary - with a separate */ | ||
156 | /* va smaller than that, it won't but check anyway */ | ||
157 | if (!data->hw_memdesc.cpu_va || | ||
158 | data->hw_memdesc.size < snapshot_size || | ||
159 | data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) { | ||
160 | ret = -EFAULT; | ||
161 | goto failed_allocation; | ||
162 | } | ||
163 | |||
164 | data->hw_snapshot = | ||
165 | (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va; | ||
166 | data->hw_end = data->hw_snapshot + | ||
167 | snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
168 | data->hw_get = data->hw_snapshot; | ||
169 | memset(data->hw_snapshot, 0xff, snapshot_size); | ||
170 | |||
171 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
172 | * as zeros */ | ||
173 | virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va); | ||
174 | virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va); | ||
175 | |||
176 | css_hw_reset_streaming(g); | ||
177 | |||
178 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
179 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
180 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
181 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); | ||
182 | |||
183 | /* this field is aligned to 4K */ | ||
184 | inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; | ||
185 | |||
186 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
187 | * should be written last */ | ||
188 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
189 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
190 | nvgpu_aperture_mask(g, &mm->hwpm.inst_block, | ||
191 | perf_pmasys_mem_block_target_sys_ncoh_f(), | ||
192 | perf_pmasys_mem_block_target_sys_coh_f(), | ||
193 | perf_pmasys_mem_block_target_lfb_f()) | | ||
194 | perf_pmasys_mem_block_valid_true_f()); | ||
195 | |||
196 | nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n"); | ||
197 | |||
198 | return 0; | ||
199 | |||
200 | failed_allocation: | ||
201 | if (data->hw_memdesc.size) { | ||
202 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
203 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
204 | } | ||
205 | data->hw_snapshot = NULL; | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | void css_hw_disable_snapshot(struct gr_gk20a *gr) | ||
211 | { | ||
212 | struct gk20a *g = gr->g; | ||
213 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
214 | |||
215 | if (!data->hw_snapshot) | ||
216 | return; | ||
217 | |||
218 | css_hw_reset_streaming(g); | ||
219 | |||
220 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
221 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
222 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
223 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
224 | |||
225 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
226 | perf_pmasys_mem_block_base_f(0) | | ||
227 | perf_pmasys_mem_block_valid_false_f() | | ||
228 | perf_pmasys_mem_block_target_f(0)); | ||
229 | |||
230 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
231 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
232 | data->hw_snapshot = NULL; | ||
233 | |||
234 | nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots disabled\n"); | ||
235 | } | ||
236 | |||
237 | static void css_gr_free_shared_data(struct gr_gk20a *gr) | ||
238 | { | ||
239 | struct gk20a *g = gr->g; | ||
240 | |||
241 | if (gr->cs_data) { | ||
242 | /* the clients list is expected to be empty */ | ||
243 | g->ops.css.disable_snapshot(gr); | ||
244 | |||
245 | /* release the objects */ | ||
246 | nvgpu_kfree(gr->g, gr->cs_data); | ||
247 | gr->cs_data = NULL; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | |||
252 | struct gk20a_cs_snapshot_client* | ||
253 | css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon) | ||
254 | { | ||
255 | struct gk20a_cs_snapshot_client *client; | ||
256 | |||
257 | nvgpu_list_for_each_entry(client, clients, | ||
258 | gk20a_cs_snapshot_client, list) { | ||
259 | if (CONTAINS_PERFMON(client, perfmon)) | ||
260 | return client; | ||
261 | } | ||
262 | |||
263 | return NULL; | ||
264 | } | ||
265 | |||
266 | static int css_gr_flush_snapshots(struct channel_gk20a *ch) | ||
267 | { | ||
268 | struct gk20a *g = ch->g; | ||
269 | struct gr_gk20a *gr = &g->gr; | ||
270 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
271 | struct gk20a_cs_snapshot_client *cur; | ||
272 | u32 pending, completed; | ||
273 | bool hw_overflow; | ||
274 | int err; | ||
275 | |||
276 | /* variables for iterating over HW entries */ | ||
277 | u32 sid; | ||
278 | struct gk20a_cs_snapshot_fifo_entry *src; | ||
279 | |||
280 | /* due to data sharing with userspace we allowed update only */ | ||
281 | /* overflows and put field in the fifo header */ | ||
282 | struct gk20a_cs_snapshot_fifo *dst; | ||
283 | struct gk20a_cs_snapshot_fifo_entry *dst_get; | ||
284 | struct gk20a_cs_snapshot_fifo_entry *dst_put; | ||
285 | struct gk20a_cs_snapshot_fifo_entry *dst_nxt; | ||
286 | struct gk20a_cs_snapshot_fifo_entry *dst_head; | ||
287 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; | ||
288 | |||
289 | if (!css) | ||
290 | return -EINVAL; | ||
291 | |||
292 | if (nvgpu_list_empty(&css->clients)) | ||
293 | return -EBADF; | ||
294 | |||
295 | /* check data available */ | ||
296 | err = g->ops.css.check_data_available(ch, &pending, &hw_overflow); | ||
297 | if (err) | ||
298 | return err; | ||
299 | |||
300 | if (!pending) | ||
301 | return 0; | ||
302 | |||
303 | if (hw_overflow) { | ||
304 | nvgpu_list_for_each_entry(cur, &css->clients, | ||
305 | gk20a_cs_snapshot_client, list) { | ||
306 | cur->snapshot->hw_overflow_events_occured++; | ||
307 | } | ||
308 | |||
309 | nvgpu_warn(g, "cyclestats: hardware overflow detected"); | ||
310 | } | ||
311 | |||
312 | /* process all items in HW buffer */ | ||
313 | sid = 0; | ||
314 | completed = 0; | ||
315 | cur = NULL; | ||
316 | dst = NULL; | ||
317 | dst_put = NULL; | ||
318 | src = css->hw_get; | ||
319 | |||
320 | /* proceed all completed records */ | ||
321 | while (sid < pending && 0 == src->zero0) { | ||
322 | /* we may have a new perfmon_id which required to */ | ||
323 | /* switch to a new client -> let's forget current */ | ||
324 | if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) { | ||
325 | dst->put = (char *)dst_put - (char *)dst; | ||
326 | dst = NULL; | ||
327 | cur = NULL; | ||
328 | } | ||
329 | |||
330 | /* now we have to select a new current client */ | ||
331 | /* the client selection rate depends from experiment */ | ||
332 | /* activity but on Android usually happened 1-2 times */ | ||
333 | if (!cur) { | ||
334 | cur = css_gr_search_client(&css->clients, | ||
335 | src->perfmon_id); | ||
336 | if (cur) { | ||
337 | /* found - setup all required data */ | ||
338 | dst = cur->snapshot; | ||
339 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); | ||
340 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); | ||
341 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); | ||
342 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end); | ||
343 | |||
344 | dst_nxt = dst_put + 1; | ||
345 | if (dst_nxt == dst_tail) | ||
346 | dst_nxt = dst_head; | ||
347 | } else { | ||
348 | /* client not found - skipping this entry */ | ||
349 | nvgpu_warn(g, "cyclestats: orphaned perfmon %u", | ||
350 | src->perfmon_id); | ||
351 | goto next_hw_fifo_entry; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | /* check for software overflows */ | ||
356 | if (dst_nxt == dst_get) { | ||
357 | /* no data copy, no pointer updates */ | ||
358 | dst->sw_overflow_events_occured++; | ||
359 | nvgpu_warn(g, "cyclestats: perfmon %u soft overflow", | ||
360 | src->perfmon_id); | ||
361 | } else { | ||
362 | *dst_put = *src; | ||
363 | completed++; | ||
364 | |||
365 | dst_put = dst_nxt++; | ||
366 | |||
367 | if (dst_nxt == dst_tail) | ||
368 | dst_nxt = dst_head; | ||
369 | } | ||
370 | |||
371 | next_hw_fifo_entry: | ||
372 | sid++; | ||
373 | if (++src >= css->hw_end) | ||
374 | src = css->hw_snapshot; | ||
375 | } | ||
376 | |||
377 | /* update client put pointer if necessary */ | ||
378 | if (cur && dst) | ||
379 | dst->put = (char *)dst_put - (char *)dst; | ||
380 | |||
381 | /* re-set HW buffer after processing taking wrapping into account */ | ||
382 | if (css->hw_get < src) { | ||
383 | memset(css->hw_get, 0xff, (src - css->hw_get) * sizeof(*src)); | ||
384 | } else { | ||
385 | memset(css->hw_snapshot, 0xff, | ||
386 | (src - css->hw_snapshot) * sizeof(*src)); | ||
387 | memset(css->hw_get, 0xff, | ||
388 | (css->hw_end - css->hw_get) * sizeof(*src)); | ||
389 | } | ||
390 | gr->cs_data->hw_get = src; | ||
391 | |||
392 | if (g->ops.css.set_handled_snapshots) | ||
393 | g->ops.css.set_handled_snapshots(g, sid); | ||
394 | |||
395 | if (completed != sid) { | ||
396 | /* not all entries proceed correctly. some of problems */ | ||
397 | /* reported as overflows, some as orphaned perfmons, */ | ||
398 | /* but it will be better notify with summary about it */ | ||
399 | nvgpu_warn(g, "cyclestats: completed %u from %u entries", | ||
400 | completed, pending); | ||
401 | } | ||
402 | |||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
407 | u32 count) | ||
408 | { | ||
409 | unsigned long *pids = data->perfmon_ids; | ||
410 | unsigned int f; | ||
411 | |||
412 | f = bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS, | ||
413 | CSS_FIRST_PERFMON_ID, count, 0); | ||
414 | if (f > CSS_MAX_PERFMON_IDS) | ||
415 | f = 0; | ||
416 | else | ||
417 | bitmap_set(pids, f, count); | ||
418 | |||
419 | return f; | ||
420 | } | ||
421 | |||
422 | u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
423 | u32 start, | ||
424 | u32 count) | ||
425 | { | ||
426 | unsigned long *pids = data->perfmon_ids; | ||
427 | u32 end = start + count; | ||
428 | u32 cnt = 0; | ||
429 | |||
430 | if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { | ||
431 | bitmap_clear(pids, start, count); | ||
432 | cnt = count; | ||
433 | } | ||
434 | |||
435 | return cnt; | ||
436 | } | ||
437 | |||
438 | |||
439 | static int css_gr_free_client_data(struct gk20a *g, | ||
440 | struct gk20a_cs_snapshot *data, | ||
441 | struct gk20a_cs_snapshot_client *client) | ||
442 | { | ||
443 | int ret = 0; | ||
444 | |||
445 | if (client->list.next && client->list.prev) | ||
446 | nvgpu_list_del(&client->list); | ||
447 | |||
448 | if (client->perfmon_start && client->perfmon_count | ||
449 | && g->ops.css.release_perfmon_ids) { | ||
450 | if (client->perfmon_count != g->ops.css.release_perfmon_ids(data, | ||
451 | client->perfmon_start, client->perfmon_count)) | ||
452 | ret = -EINVAL; | ||
453 | } | ||
454 | |||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static int css_gr_create_client_data(struct gk20a *g, | ||
459 | struct gk20a_cs_snapshot *data, | ||
460 | u32 perfmon_count, | ||
461 | struct gk20a_cs_snapshot_client *cur) | ||
462 | { | ||
463 | /* | ||
464 | * Special handling in-case of rm-server | ||
465 | * | ||
466 | * client snapshot buffer will not be mapped | ||
467 | * in-case of rm-server its only mapped in | ||
468 | * guest side | ||
469 | */ | ||
470 | if (cur->snapshot) { | ||
471 | memset(cur->snapshot, 0, sizeof(*cur->snapshot)); | ||
472 | cur->snapshot->start = sizeof(*cur->snapshot); | ||
473 | /* we should be ensure that can fit all fifo entries here */ | ||
474 | cur->snapshot->end = | ||
475 | CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size) | ||
476 | * sizeof(struct gk20a_cs_snapshot_fifo_entry) | ||
477 | + sizeof(struct gk20a_cs_snapshot_fifo); | ||
478 | cur->snapshot->get = cur->snapshot->start; | ||
479 | cur->snapshot->put = cur->snapshot->start; | ||
480 | } | ||
481 | |||
482 | cur->perfmon_count = perfmon_count; | ||
483 | |||
484 | /* In virtual case, perfmon ID allocation is handled by the server | ||
485 | * at the time of the attach (allocate_perfmon_ids is NULL in this case) | ||
486 | */ | ||
487 | if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) { | ||
488 | cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data, | ||
489 | cur->perfmon_count); | ||
490 | if (!cur->perfmon_start) | ||
491 | return -ENOENT; | ||
492 | } | ||
493 | |||
494 | nvgpu_list_add_tail(&cur->list, &data->clients); | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | |||
500 | int gr_gk20a_css_attach(struct channel_gk20a *ch, | ||
501 | u32 perfmon_count, | ||
502 | u32 *perfmon_start, | ||
503 | struct gk20a_cs_snapshot_client *cs_client) | ||
504 | { | ||
505 | int ret = 0; | ||
506 | struct gk20a *g = ch->g; | ||
507 | struct gr_gk20a *gr; | ||
508 | |||
509 | /* we must have a placeholder to store pointer to client structure */ | ||
510 | if (!cs_client) | ||
511 | return -EINVAL; | ||
512 | |||
513 | if (!perfmon_count || | ||
514 | perfmon_count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID) | ||
515 | return -EINVAL; | ||
516 | |||
517 | nvgpu_speculation_barrier(); | ||
518 | |||
519 | gr = &g->gr; | ||
520 | |||
521 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
522 | |||
523 | ret = css_gr_create_shared_data(gr); | ||
524 | if (ret) | ||
525 | goto failed; | ||
526 | |||
527 | ret = css_gr_create_client_data(g, gr->cs_data, | ||
528 | perfmon_count, | ||
529 | cs_client); | ||
530 | if (ret) | ||
531 | goto failed; | ||
532 | |||
533 | ret = g->ops.css.enable_snapshot(ch, cs_client); | ||
534 | if (ret) | ||
535 | goto failed; | ||
536 | |||
537 | if (perfmon_start) | ||
538 | *perfmon_start = cs_client->perfmon_start; | ||
539 | |||
540 | nvgpu_mutex_release(&gr->cs_lock); | ||
541 | |||
542 | return 0; | ||
543 | |||
544 | failed: | ||
545 | if (gr->cs_data) { | ||
546 | if (cs_client) { | ||
547 | css_gr_free_client_data(g, gr->cs_data, cs_client); | ||
548 | cs_client = NULL; | ||
549 | } | ||
550 | |||
551 | if (nvgpu_list_empty(&gr->cs_data->clients)) | ||
552 | css_gr_free_shared_data(gr); | ||
553 | } | ||
554 | nvgpu_mutex_release(&gr->cs_lock); | ||
555 | |||
556 | if (perfmon_start) | ||
557 | *perfmon_start = 0; | ||
558 | |||
559 | return ret; | ||
560 | } | ||
561 | |||
562 | int gr_gk20a_css_detach(struct channel_gk20a *ch, | ||
563 | struct gk20a_cs_snapshot_client *cs_client) | ||
564 | { | ||
565 | int ret = 0; | ||
566 | struct gk20a *g = ch->g; | ||
567 | struct gr_gk20a *gr; | ||
568 | |||
569 | if (!cs_client) | ||
570 | return -EINVAL; | ||
571 | |||
572 | gr = &g->gr; | ||
573 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
574 | if (gr->cs_data) { | ||
575 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
576 | |||
577 | if (g->ops.css.detach_snapshot) | ||
578 | g->ops.css.detach_snapshot(ch, cs_client); | ||
579 | |||
580 | ret = css_gr_free_client_data(g, data, cs_client); | ||
581 | if (nvgpu_list_empty(&data->clients)) | ||
582 | css_gr_free_shared_data(gr); | ||
583 | } else { | ||
584 | ret = -EBADF; | ||
585 | } | ||
586 | nvgpu_mutex_release(&gr->cs_lock); | ||
587 | |||
588 | return ret; | ||
589 | } | ||
590 | |||
591 | int gr_gk20a_css_flush(struct channel_gk20a *ch, | ||
592 | struct gk20a_cs_snapshot_client *cs_client) | ||
593 | { | ||
594 | int ret = 0; | ||
595 | struct gk20a *g = ch->g; | ||
596 | struct gr_gk20a *gr; | ||
597 | |||
598 | if (!cs_client) | ||
599 | return -EINVAL; | ||
600 | |||
601 | gr = &g->gr; | ||
602 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
603 | ret = css_gr_flush_snapshots(ch); | ||
604 | nvgpu_mutex_release(&gr->cs_lock); | ||
605 | |||
606 | return ret; | ||
607 | } | ||
608 | |||
609 | /* helper function with locking to cleanup snapshot code code in gr_gk20a.c */ | ||
610 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | ||
611 | { | ||
612 | struct gr_gk20a *gr = &g->gr; | ||
613 | |||
614 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
615 | css_gr_free_shared_data(gr); | ||
616 | nvgpu_mutex_release(&gr->cs_lock); | ||
617 | nvgpu_mutex_destroy(&gr->cs_lock); | ||
618 | } | ||
619 | |||
620 | int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
621 | bool *hw_overflow) | ||
622 | { | ||
623 | struct gk20a *g = ch->g; | ||
624 | struct gr_gk20a *gr = &g->gr; | ||
625 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
626 | |||
627 | if (!css->hw_snapshot) | ||
628 | return -EINVAL; | ||
629 | |||
630 | *pending = css_hw_get_pending_snapshots(g); | ||
631 | if (!*pending) | ||
632 | return 0; | ||
633 | |||
634 | *hw_overflow = css_hw_get_overflow_status(g); | ||
635 | return 0; | ||
636 | } | ||