diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 676 |
1 files changed, 676 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c new file mode 100644 index 00000000..e3896981 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -0,0 +1,676 @@ | |||
1 | /* | ||
2 | * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/dma-buf.h> | ||
27 | |||
28 | #include <nvgpu/bitops.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/lock.h> | ||
31 | #include <nvgpu/dma.h> | ||
32 | #include <nvgpu/mm.h> | ||
33 | |||
34 | #include "gk20a.h" | ||
35 | #include "css_gr_gk20a.h" | ||
36 | |||
37 | #include <nvgpu/log.h> | ||
38 | #include <nvgpu/bug.h> | ||
39 | |||
40 | #include <nvgpu/hw/gk20a/hw_perf_gk20a.h> | ||
41 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> | ||
42 | |||
43 | /* check client for pointed perfmon ownership */ | ||
44 | #define CONTAINS_PERFMON(cl, pm) \ | ||
45 | ((cl)->perfmon_start <= (pm) && \ | ||
46 | ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) | ||
47 | |||
48 | /* the minimal size of client buffer */ | ||
49 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | ||
50 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | ||
51 | sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) | ||
52 | |||
53 | /* address of fifo entry by offset */ | ||
54 | #define CSS_FIFO_ENTRY(fifo, offs) \ | ||
55 | ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs))) | ||
56 | |||
57 | /* calculate area capacity in number of fifo entries */ | ||
58 | #define CSS_FIFO_ENTRY_CAPACITY(s) \ | ||
59 | (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \ | ||
60 | / sizeof(struct gk20a_cs_snapshot_fifo_entry)) | ||
61 | |||
62 | /* reserved to indicate failures with data */ | ||
63 | #define CSS_FIRST_PERFMON_ID 32 | ||
64 | /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ | ||
65 | #define CSS_MAX_PERFMON_IDS 256 | ||
66 | |||
67 | /* reports whether the hw queue overflowed */ | ||
68 | static inline bool css_hw_get_overflow_status(struct gk20a *g) | ||
69 | { | ||
70 | const u32 st = perf_pmasys_control_membuf_status_overflowed_f(); | ||
71 | return st == (gk20a_readl(g, perf_pmasys_control_r()) & st); | ||
72 | } | ||
73 | |||
74 | /* returns how many pending snapshot entries are pending */ | ||
75 | static inline u32 css_hw_get_pending_snapshots(struct gk20a *g) | ||
76 | { | ||
77 | return gk20a_readl(g, perf_pmasys_mem_bytes_r()) / | ||
78 | sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
79 | } | ||
80 | |||
81 | /* informs hw how many snapshots have been processed (frees up fifo space) */ | ||
82 | inline void css_hw_set_handled_snapshots(struct gk20a *g, u32 done) | ||
83 | { | ||
84 | if (done > 0) { | ||
85 | gk20a_writel(g, perf_pmasys_mem_bump_r(), | ||
86 | done * sizeof(struct gk20a_cs_snapshot_fifo_entry)); | ||
87 | } | ||
88 | } | ||
89 | |||
90 | /* disable streaming to memory */ | ||
91 | static void css_hw_reset_streaming(struct gk20a *g) | ||
92 | { | ||
93 | u32 engine_status; | ||
94 | |||
95 | /* reset the perfmon */ | ||
96 | g->ops.mc.reset(g, mc_enable_perfmon_enabled_f()); | ||
97 | |||
98 | /* RBUFEMPTY must be set -- otherwise we'll pick up */ | ||
99 | /* snapshot that have been queued up from earlier */ | ||
100 | engine_status = gk20a_readl(g, perf_pmasys_enginestatus_r()); | ||
101 | WARN_ON(0 == (engine_status | ||
102 | & perf_pmasys_enginestatus_rbufempty_empty_f())); | ||
103 | |||
104 | /* turn off writes */ | ||
105 | gk20a_writel(g, perf_pmasys_control_r(), | ||
106 | perf_pmasys_control_membuf_clear_status_doit_f()); | ||
107 | |||
108 | /* pointing all pending snapshots as handled */ | ||
109 | css_hw_set_handled_snapshots(g, css_hw_get_pending_snapshots(g)); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * WARNING: all css_gr_XXX functions are local and expected to be called | ||
114 | * from locked context (protected by cs_lock) | ||
115 | */ | ||
116 | |||
117 | static int css_gr_create_shared_data(struct gr_gk20a *gr) | ||
118 | { | ||
119 | struct gk20a_cs_snapshot *data; | ||
120 | |||
121 | if (gr->cs_data) | ||
122 | return 0; | ||
123 | |||
124 | data = nvgpu_kzalloc(gr->g, sizeof(*data)); | ||
125 | if (!data) | ||
126 | return -ENOMEM; | ||
127 | |||
128 | nvgpu_init_list_node(&data->clients); | ||
129 | gr->cs_data = data; | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | int css_hw_enable_snapshot(struct channel_gk20a *ch, | ||
135 | struct gk20a_cs_snapshot_client *cs_client) | ||
136 | { | ||
137 | struct gk20a *g = ch->g; | ||
138 | struct gr_gk20a *gr = &g->gr; | ||
139 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
140 | u32 snapshot_size = cs_client->snapshot_size; | ||
141 | int ret; | ||
142 | |||
143 | u32 virt_addr_lo; | ||
144 | u32 virt_addr_hi; | ||
145 | u32 inst_pa_page; | ||
146 | |||
147 | if (data->hw_snapshot) | ||
148 | return 0; | ||
149 | |||
150 | if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE) | ||
151 | snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE; | ||
152 | |||
153 | ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size, | ||
154 | &data->hw_memdesc); | ||
155 | if (ret) | ||
156 | return ret; | ||
157 | |||
158 | /* perf output buffer may not cross a 4GB boundary - with a separate */ | ||
159 | /* va smaller than that, it won't but check anyway */ | ||
160 | if (!data->hw_memdesc.cpu_va || | ||
161 | data->hw_memdesc.size < snapshot_size || | ||
162 | data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) { | ||
163 | ret = -EFAULT; | ||
164 | goto failed_allocation; | ||
165 | } | ||
166 | |||
167 | data->hw_snapshot = | ||
168 | (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va; | ||
169 | data->hw_end = data->hw_snapshot + | ||
170 | snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry); | ||
171 | data->hw_get = data->hw_snapshot; | ||
172 | memset(data->hw_snapshot, 0xff, snapshot_size); | ||
173 | |||
174 | /* address and size are aligned to 32 bytes, the lowest bits read back | ||
175 | * as zeros */ | ||
176 | virt_addr_lo = u64_lo32(data->hw_memdesc.gpu_va); | ||
177 | virt_addr_hi = u64_hi32(data->hw_memdesc.gpu_va); | ||
178 | |||
179 | css_hw_reset_streaming(g); | ||
180 | |||
181 | gk20a_writel(g, perf_pmasys_outbase_r(), virt_addr_lo); | ||
182 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
183 | perf_pmasys_outbaseupper_ptr_f(virt_addr_hi)); | ||
184 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); | ||
185 | |||
186 | /* this field is aligned to 4K */ | ||
187 | inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; | ||
188 | |||
189 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | ||
190 | * should be written last */ | ||
191 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
192 | perf_pmasys_mem_block_base_f(inst_pa_page) | | ||
193 | perf_pmasys_mem_block_valid_true_f() | | ||
194 | perf_pmasys_mem_block_target_lfb_f()); | ||
195 | |||
196 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots enabled\n"); | ||
197 | |||
198 | return 0; | ||
199 | |||
200 | failed_allocation: | ||
201 | if (data->hw_memdesc.size) { | ||
202 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
203 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
204 | } | ||
205 | data->hw_snapshot = NULL; | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | void css_hw_disable_snapshot(struct gr_gk20a *gr) | ||
211 | { | ||
212 | struct gk20a *g = gr->g; | ||
213 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
214 | |||
215 | if (!data->hw_snapshot) | ||
216 | return; | ||
217 | |||
218 | css_hw_reset_streaming(g); | ||
219 | |||
220 | gk20a_writel(g, perf_pmasys_outbase_r(), 0); | ||
221 | gk20a_writel(g, perf_pmasys_outbaseupper_r(), | ||
222 | perf_pmasys_outbaseupper_ptr_f(0)); | ||
223 | gk20a_writel(g, perf_pmasys_outsize_r(), 0); | ||
224 | |||
225 | gk20a_writel(g, perf_pmasys_mem_block_r(), | ||
226 | perf_pmasys_mem_block_base_f(0) | | ||
227 | perf_pmasys_mem_block_valid_false_f() | | ||
228 | perf_pmasys_mem_block_target_f(0)); | ||
229 | |||
230 | nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); | ||
231 | memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); | ||
232 | data->hw_snapshot = NULL; | ||
233 | |||
234 | gk20a_dbg_info("cyclestats: buffer for hardware snapshots disabled\n"); | ||
235 | } | ||
236 | |||
237 | static void css_gr_free_shared_data(struct gr_gk20a *gr) | ||
238 | { | ||
239 | struct gk20a *g = gr->g; | ||
240 | |||
241 | if (gr->cs_data) { | ||
242 | /* the clients list is expected to be empty */ | ||
243 | g->ops.css.disable_snapshot(gr); | ||
244 | |||
245 | /* release the objects */ | ||
246 | nvgpu_kfree(gr->g, gr->cs_data); | ||
247 | gr->cs_data = NULL; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | |||
252 | static struct gk20a_cs_snapshot_client* | ||
253 | css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon) | ||
254 | { | ||
255 | struct gk20a_cs_snapshot_client *client; | ||
256 | |||
257 | nvgpu_list_for_each_entry(client, clients, | ||
258 | gk20a_cs_snapshot_client, list) { | ||
259 | if (CONTAINS_PERFMON(client, perfmon)) | ||
260 | return client; | ||
261 | } | ||
262 | |||
263 | return NULL; | ||
264 | } | ||
265 | |||
266 | static int css_gr_flush_snapshots(struct channel_gk20a *ch) | ||
267 | { | ||
268 | struct gk20a *g = ch->g; | ||
269 | struct gr_gk20a *gr = &g->gr; | ||
270 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
271 | struct gk20a_cs_snapshot_client *cur; | ||
272 | u32 pending, completed; | ||
273 | bool hw_overflow; | ||
274 | int err; | ||
275 | |||
276 | /* variables for iterating over HW entries */ | ||
277 | u32 sid; | ||
278 | struct gk20a_cs_snapshot_fifo_entry *src; | ||
279 | |||
280 | /* due to data sharing with userspace we allowed update only */ | ||
281 | /* overflows and put field in the fifo header */ | ||
282 | struct gk20a_cs_snapshot_fifo *dst; | ||
283 | struct gk20a_cs_snapshot_fifo_entry *dst_get; | ||
284 | struct gk20a_cs_snapshot_fifo_entry *dst_put; | ||
285 | struct gk20a_cs_snapshot_fifo_entry *dst_nxt; | ||
286 | struct gk20a_cs_snapshot_fifo_entry *dst_head; | ||
287 | struct gk20a_cs_snapshot_fifo_entry *dst_tail; | ||
288 | |||
289 | if (!css) | ||
290 | return -EINVAL; | ||
291 | |||
292 | if (nvgpu_list_empty(&css->clients)) | ||
293 | return -EBADF; | ||
294 | |||
295 | /* check data available */ | ||
296 | err = g->ops.css.check_data_available(ch, &pending, &hw_overflow); | ||
297 | if (err) | ||
298 | return err; | ||
299 | |||
300 | if (!pending) | ||
301 | return 0; | ||
302 | |||
303 | if (hw_overflow) { | ||
304 | nvgpu_list_for_each_entry(cur, &css->clients, | ||
305 | gk20a_cs_snapshot_client, list) { | ||
306 | cur->snapshot->hw_overflow_events_occured++; | ||
307 | } | ||
308 | |||
309 | nvgpu_warn(g, "cyclestats: hardware overflow detected"); | ||
310 | } | ||
311 | |||
312 | /* process all items in HW buffer */ | ||
313 | sid = 0; | ||
314 | completed = 0; | ||
315 | cur = NULL; | ||
316 | dst = NULL; | ||
317 | dst_put = NULL; | ||
318 | src = css->hw_get; | ||
319 | |||
320 | /* proceed all completed records */ | ||
321 | while (sid < pending && 0 == src->zero0) { | ||
322 | /* we may have a new perfmon_id which required to */ | ||
323 | /* switch to a new client -> let's forget current */ | ||
324 | if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) { | ||
325 | dst->put = (char *)dst_put - (char *)dst; | ||
326 | dst = NULL; | ||
327 | cur = NULL; | ||
328 | } | ||
329 | |||
330 | /* now we have to select a new current client */ | ||
331 | /* the client selection rate depends from experiment */ | ||
332 | /* activity but on Android usually happened 1-2 times */ | ||
333 | if (!cur) { | ||
334 | cur = css_gr_search_client(&css->clients, | ||
335 | src->perfmon_id); | ||
336 | if (cur) { | ||
337 | /* found - setup all required data */ | ||
338 | dst = cur->snapshot; | ||
339 | dst_get = CSS_FIFO_ENTRY(dst, dst->get); | ||
340 | dst_put = CSS_FIFO_ENTRY(dst, dst->put); | ||
341 | dst_head = CSS_FIFO_ENTRY(dst, dst->start); | ||
342 | dst_tail = CSS_FIFO_ENTRY(dst, dst->end); | ||
343 | |||
344 | dst_nxt = dst_put + 1; | ||
345 | if (dst_nxt == dst_tail) | ||
346 | dst_nxt = dst_head; | ||
347 | } else { | ||
348 | /* client not found - skipping this entry */ | ||
349 | nvgpu_warn(g, "cyclestats: orphaned perfmon %u", | ||
350 | src->perfmon_id); | ||
351 | goto next_hw_fifo_entry; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | /* check for software overflows */ | ||
356 | if (dst_nxt == dst_get) { | ||
357 | /* no data copy, no pointer updates */ | ||
358 | dst->sw_overflow_events_occured++; | ||
359 | nvgpu_warn(g, "cyclestats: perfmon %u soft overflow", | ||
360 | src->perfmon_id); | ||
361 | } else { | ||
362 | *dst_put = *src; | ||
363 | completed++; | ||
364 | |||
365 | dst_put = dst_nxt++; | ||
366 | |||
367 | if (dst_nxt == dst_tail) | ||
368 | dst_nxt = dst_head; | ||
369 | } | ||
370 | |||
371 | next_hw_fifo_entry: | ||
372 | sid++; | ||
373 | if (++src >= css->hw_end) | ||
374 | src = css->hw_snapshot; | ||
375 | } | ||
376 | |||
377 | /* update client put pointer if necessary */ | ||
378 | if (cur && dst) | ||
379 | dst->put = (char *)dst_put - (char *)dst; | ||
380 | |||
381 | /* re-set HW buffer after processing taking wrapping into account */ | ||
382 | if (css->hw_get < src) { | ||
383 | memset(css->hw_get, 0xff, (src - css->hw_get) * sizeof(*src)); | ||
384 | } else { | ||
385 | memset(css->hw_snapshot, 0xff, | ||
386 | (src - css->hw_snapshot) * sizeof(*src)); | ||
387 | memset(css->hw_get, 0xff, | ||
388 | (css->hw_end - css->hw_get) * sizeof(*src)); | ||
389 | } | ||
390 | gr->cs_data->hw_get = src; | ||
391 | |||
392 | if (g->ops.css.set_handled_snapshots) | ||
393 | g->ops.css.set_handled_snapshots(g, sid); | ||
394 | |||
395 | if (completed != sid) { | ||
396 | /* not all entries proceed correctly. some of problems */ | ||
397 | /* reported as overflows, some as orphaned perfmons, */ | ||
398 | /* but it will be better notify with summary about it */ | ||
399 | nvgpu_warn(g, "cyclestats: completed %u from %u entries", | ||
400 | completed, pending); | ||
401 | } | ||
402 | |||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | u32 css_gr_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
407 | u32 count) | ||
408 | { | ||
409 | unsigned long *pids = data->perfmon_ids; | ||
410 | unsigned int f; | ||
411 | |||
412 | f = bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS, | ||
413 | CSS_FIRST_PERFMON_ID, count, 0); | ||
414 | if (f > CSS_MAX_PERFMON_IDS) | ||
415 | f = 0; | ||
416 | else | ||
417 | bitmap_set(pids, f, count); | ||
418 | |||
419 | return f; | ||
420 | } | ||
421 | |||
422 | u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, | ||
423 | u32 start, | ||
424 | u32 count) | ||
425 | { | ||
426 | unsigned long *pids = data->perfmon_ids; | ||
427 | u32 end = start + count; | ||
428 | u32 cnt = 0; | ||
429 | |||
430 | if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { | ||
431 | bitmap_clear(pids, start, count); | ||
432 | cnt = count; | ||
433 | } | ||
434 | |||
435 | return cnt; | ||
436 | } | ||
437 | |||
438 | |||
439 | static int css_gr_free_client_data(struct gk20a *g, | ||
440 | struct gk20a_cs_snapshot *data, | ||
441 | struct gk20a_cs_snapshot_client *client) | ||
442 | { | ||
443 | int ret = 0; | ||
444 | |||
445 | if (client->list.next && client->list.prev) | ||
446 | nvgpu_list_del(&client->list); | ||
447 | |||
448 | if (client->perfmon_start && client->perfmon_count | ||
449 | && g->ops.css.release_perfmon_ids) { | ||
450 | if (client->perfmon_count != g->ops.css.release_perfmon_ids(data, | ||
451 | client->perfmon_start, client->perfmon_count)) | ||
452 | ret = -EINVAL; | ||
453 | } | ||
454 | |||
455 | if (client->dma_handler) { | ||
456 | if (client->snapshot) | ||
457 | dma_buf_vunmap(client->dma_handler, client->snapshot); | ||
458 | dma_buf_put(client->dma_handler); | ||
459 | } | ||
460 | |||
461 | nvgpu_kfree(g, client); | ||
462 | |||
463 | return ret; | ||
464 | } | ||
465 | |||
466 | static int css_gr_create_client_data(struct gk20a *g, | ||
467 | struct gk20a_cs_snapshot *data, | ||
468 | u32 dmabuf_fd, u32 perfmon_count, | ||
469 | struct gk20a_cs_snapshot_client **client) | ||
470 | { | ||
471 | struct gk20a_cs_snapshot_client *cur; | ||
472 | int ret = 0; | ||
473 | |||
474 | cur = nvgpu_kzalloc(g, sizeof(*cur)); | ||
475 | if (!cur) { | ||
476 | ret = -ENOMEM; | ||
477 | goto failed; | ||
478 | } | ||
479 | |||
480 | cur->dmabuf_fd = dmabuf_fd; | ||
481 | cur->dma_handler = dma_buf_get(cur->dmabuf_fd); | ||
482 | if (IS_ERR(cur->dma_handler)) { | ||
483 | ret = PTR_ERR(cur->dma_handler); | ||
484 | cur->dma_handler = NULL; | ||
485 | goto failed; | ||
486 | } | ||
487 | |||
488 | cur->snapshot = (struct gk20a_cs_snapshot_fifo *) | ||
489 | dma_buf_vmap(cur->dma_handler); | ||
490 | if (!cur->snapshot) { | ||
491 | ret = -ENOMEM; | ||
492 | goto failed; | ||
493 | } | ||
494 | |||
495 | cur->snapshot_size = cur->dma_handler->size; | ||
496 | if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { | ||
497 | ret = -ENOMEM; | ||
498 | goto failed; | ||
499 | } | ||
500 | |||
501 | memset(cur->snapshot, 0, sizeof(*cur->snapshot)); | ||
502 | cur->snapshot->start = sizeof(*cur->snapshot); | ||
503 | /* we should be ensure that can fit all fifo entries here */ | ||
504 | cur->snapshot->end = | ||
505 | CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size) | ||
506 | * sizeof(struct gk20a_cs_snapshot_fifo_entry) | ||
507 | + sizeof(struct gk20a_cs_snapshot_fifo); | ||
508 | cur->snapshot->get = cur->snapshot->start; | ||
509 | cur->snapshot->put = cur->snapshot->start; | ||
510 | |||
511 | cur->perfmon_count = perfmon_count; | ||
512 | |||
513 | /* In virtual case, perfmon ID allocation is handled by the server | ||
514 | * at the time of the attach (allocate_perfmon_ids is NULL in this case) | ||
515 | */ | ||
516 | if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) { | ||
517 | cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data, | ||
518 | cur->perfmon_count); | ||
519 | if (!cur->perfmon_start) { | ||
520 | ret = -ENOENT; | ||
521 | goto failed; | ||
522 | } | ||
523 | } | ||
524 | |||
525 | nvgpu_list_add_tail(&cur->list, &data->clients); | ||
526 | *client = cur; | ||
527 | |||
528 | return 0; | ||
529 | |||
530 | failed: | ||
531 | *client = NULL; | ||
532 | if (cur) | ||
533 | css_gr_free_client_data(g, data, cur); | ||
534 | |||
535 | return ret; | ||
536 | } | ||
537 | |||
538 | |||
539 | int gr_gk20a_css_attach(struct channel_gk20a *ch, | ||
540 | u32 dmabuf_fd, | ||
541 | u32 perfmon_count, | ||
542 | u32 *perfmon_start, | ||
543 | struct gk20a_cs_snapshot_client **cs_client) | ||
544 | { | ||
545 | int ret = 0; | ||
546 | struct gk20a *g = ch->g; | ||
547 | struct gr_gk20a *gr; | ||
548 | |||
549 | /* we must have a placeholder to store pointer to client structure */ | ||
550 | if (!cs_client) | ||
551 | return -EINVAL; | ||
552 | |||
553 | if (!perfmon_count || | ||
554 | perfmon_count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID) | ||
555 | return -EINVAL; | ||
556 | |||
557 | gr = &g->gr; | ||
558 | *cs_client = NULL; | ||
559 | |||
560 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
561 | |||
562 | ret = css_gr_create_shared_data(gr); | ||
563 | if (ret) | ||
564 | goto failed; | ||
565 | |||
566 | ret = css_gr_create_client_data(g, gr->cs_data, | ||
567 | dmabuf_fd, | ||
568 | perfmon_count, | ||
569 | cs_client); | ||
570 | if (ret) | ||
571 | goto failed; | ||
572 | |||
573 | ret = g->ops.css.enable_snapshot(ch, *cs_client); | ||
574 | if (ret) | ||
575 | goto failed; | ||
576 | |||
577 | if (perfmon_start) | ||
578 | *perfmon_start = (*cs_client)->perfmon_start; | ||
579 | |||
580 | nvgpu_mutex_release(&gr->cs_lock); | ||
581 | |||
582 | return 0; | ||
583 | |||
584 | failed: | ||
585 | if (gr->cs_data) { | ||
586 | if (*cs_client) { | ||
587 | css_gr_free_client_data(g, gr->cs_data, *cs_client); | ||
588 | *cs_client = NULL; | ||
589 | } | ||
590 | |||
591 | if (nvgpu_list_empty(&gr->cs_data->clients)) | ||
592 | css_gr_free_shared_data(gr); | ||
593 | } | ||
594 | nvgpu_mutex_release(&gr->cs_lock); | ||
595 | |||
596 | if (perfmon_start) | ||
597 | *perfmon_start = 0; | ||
598 | |||
599 | return ret; | ||
600 | } | ||
601 | |||
602 | int gr_gk20a_css_detach(struct channel_gk20a *ch, | ||
603 | struct gk20a_cs_snapshot_client *cs_client) | ||
604 | { | ||
605 | int ret = 0; | ||
606 | struct gk20a *g = ch->g; | ||
607 | struct gr_gk20a *gr; | ||
608 | |||
609 | if (!cs_client) | ||
610 | return -EINVAL; | ||
611 | |||
612 | gr = &g->gr; | ||
613 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
614 | if (gr->cs_data) { | ||
615 | struct gk20a_cs_snapshot *data = gr->cs_data; | ||
616 | |||
617 | if (g->ops.css.detach_snapshot) | ||
618 | g->ops.css.detach_snapshot(ch, cs_client); | ||
619 | |||
620 | ret = css_gr_free_client_data(g, data, cs_client); | ||
621 | if (nvgpu_list_empty(&data->clients)) | ||
622 | css_gr_free_shared_data(gr); | ||
623 | } else { | ||
624 | ret = -EBADF; | ||
625 | } | ||
626 | nvgpu_mutex_release(&gr->cs_lock); | ||
627 | |||
628 | return ret; | ||
629 | } | ||
630 | |||
631 | int gr_gk20a_css_flush(struct channel_gk20a *ch, | ||
632 | struct gk20a_cs_snapshot_client *cs_client) | ||
633 | { | ||
634 | int ret = 0; | ||
635 | struct gk20a *g = ch->g; | ||
636 | struct gr_gk20a *gr; | ||
637 | |||
638 | if (!cs_client) | ||
639 | return -EINVAL; | ||
640 | |||
641 | gr = &g->gr; | ||
642 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
643 | ret = css_gr_flush_snapshots(ch); | ||
644 | nvgpu_mutex_release(&gr->cs_lock); | ||
645 | |||
646 | return ret; | ||
647 | } | ||
648 | |||
649 | /* helper function with locking to cleanup snapshot code code in gr_gk20a.c */ | ||
650 | void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | ||
651 | { | ||
652 | struct gr_gk20a *gr = &g->gr; | ||
653 | |||
654 | nvgpu_mutex_acquire(&gr->cs_lock); | ||
655 | css_gr_free_shared_data(gr); | ||
656 | nvgpu_mutex_release(&gr->cs_lock); | ||
657 | nvgpu_mutex_destroy(&gr->cs_lock); | ||
658 | } | ||
659 | |||
660 | int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, | ||
661 | bool *hw_overflow) | ||
662 | { | ||
663 | struct gk20a *g = ch->g; | ||
664 | struct gr_gk20a *gr = &g->gr; | ||
665 | struct gk20a_cs_snapshot *css = gr->cs_data; | ||
666 | |||
667 | if (!css->hw_snapshot) | ||
668 | return -EINVAL; | ||
669 | |||
670 | *pending = css_hw_get_pending_snapshots(g); | ||
671 | if (!*pending) | ||
672 | return 0; | ||
673 | |||
674 | *hw_overflow = css_hw_get_overflow_status(g); | ||
675 | return 0; | ||
676 | } | ||