aboutsummaryrefslogtreecommitdiffstats
path: root/include/gk20a/fecs_trace_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/gk20a/fecs_trace_gk20a.c')
-rw-r--r--include/gk20a/fecs_trace_gk20a.c744
1 files changed, 0 insertions, 744 deletions
diff --git a/include/gk20a/fecs_trace_gk20a.c b/include/gk20a/fecs_trace_gk20a.c
deleted file mode 100644
index 5c1c5e0..0000000
--- a/include/gk20a/fecs_trace_gk20a.c
+++ /dev/null
@@ -1,744 +0,0 @@
1/*
2 * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/kmem.h>
24#include <nvgpu/dma.h>
25#include <nvgpu/enabled.h>
26#include <nvgpu/bug.h>
27#include <nvgpu/hashtable.h>
28#include <nvgpu/circ_buf.h>
29#include <nvgpu/thread.h>
30#include <nvgpu/barrier.h>
31#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/ctxsw_trace.h>
34#include <nvgpu/io.h>
35#include <nvgpu/utils.h>
36#include <nvgpu/timers.h>
37#include <nvgpu/channel.h>
38
39#include "fecs_trace_gk20a.h"
40#include "gk20a.h"
41#include "gr_gk20a.h"
42
43#include <nvgpu/log.h>
44#include <nvgpu/fecs_trace.h>
45
46#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
48
49struct gk20a_fecs_trace_hash_ent {
50 u32 context_ptr;
51 pid_t pid;
52 struct hlist_node node;
53};
54
55struct gk20a_fecs_trace {
56
57 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
58 struct nvgpu_mutex hash_lock;
59 struct nvgpu_mutex poll_lock;
60 struct nvgpu_thread poll_task;
61 bool init;
62 struct nvgpu_mutex enable_lock;
63 u32 enable_count;
64};
65
66#ifdef CONFIG_GK20A_CTXSW_TRACE
67u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void)
68{
69 return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
70}
71
72u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
73{
74 return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
75}
76
77u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
78{
79 return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
80}
81
82static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
83{
84 return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
85}
86
87int gk20a_fecs_trace_num_ts(void)
88{
89 return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
90 - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
91}
92
93struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
94 struct gk20a *g, int idx)
95{
96 struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
97
98 return (struct gk20a_fecs_trace_record *)
99 ((u8 *) mem->cpu_va
100 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
101}
102
103bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
104{
105 /*
106 * testing magic_hi should suffice. magic_lo is sometimes used
107 * as a sequence number in experimental ucode.
108 */
109 return (r->magic_hi
110 == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
111}
112
113int gk20a_fecs_trace_get_read_index(struct gk20a *g)
114{
115 return gr_gk20a_elpg_protected_call(g,
116 gk20a_readl(g, gr_fecs_mailbox1_r()));
117}
118
119int gk20a_fecs_trace_get_write_index(struct gk20a *g)
120{
121 return gr_gk20a_elpg_protected_call(g,
122 gk20a_readl(g, gr_fecs_mailbox0_r()));
123}
124
125static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
126{
127 nvgpu_log(g, gpu_dbg_ctxsw, "set read=%d", index);
128 return gr_gk20a_elpg_protected_call(g,
129 (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
130}
131
132void gk20a_fecs_trace_hash_dump(struct gk20a *g)
133{
134 u32 bkt;
135 struct gk20a_fecs_trace_hash_ent *ent;
136 struct gk20a_fecs_trace *trace = g->fecs_trace;
137
138 nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
139
140 nvgpu_mutex_acquire(&trace->hash_lock);
141 hash_for_each(trace->pid_hash_table, bkt, ent, node)
142 {
143 nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
144 ent, bkt, ent->context_ptr, ent->pid);
145
146 }
147 nvgpu_mutex_release(&trace->hash_lock);
148}
149
150static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
151{
152 struct gk20a_fecs_trace_hash_ent *he;
153 struct gk20a_fecs_trace *trace = g->fecs_trace;
154
155 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
156 "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
157
158 he = nvgpu_kzalloc(g, sizeof(*he));
159 if (unlikely(!he)) {
160 nvgpu_warn(g,
161 "can't alloc new hash entry for context_ptr=%x pid=%d",
162 context_ptr, pid);
163 return -ENOMEM;
164 }
165
166 he->context_ptr = context_ptr;
167 he->pid = pid;
168 nvgpu_mutex_acquire(&trace->hash_lock);
169 hash_add(trace->pid_hash_table, &he->node, context_ptr);
170 nvgpu_mutex_release(&trace->hash_lock);
171 return 0;
172}
173
174static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
175{
176 struct hlist_node *tmp;
177 struct gk20a_fecs_trace_hash_ent *ent;
178 struct gk20a_fecs_trace *trace = g->fecs_trace;
179
180 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
181 "freeing hash entry context_ptr=%x", context_ptr);
182
183 nvgpu_mutex_acquire(&trace->hash_lock);
184 hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
185 context_ptr) {
186 if (ent->context_ptr == context_ptr) {
187 hash_del(&ent->node);
188 nvgpu_log(g, gpu_dbg_ctxsw,
189 "freed hash entry=%p context_ptr=%x", ent,
190 ent->context_ptr);
191 nvgpu_kfree(g, ent);
192 break;
193 }
194 }
195 nvgpu_mutex_release(&trace->hash_lock);
196}
197
198static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
199{
200 u32 bkt;
201 struct hlist_node *tmp;
202 struct gk20a_fecs_trace_hash_ent *ent;
203 struct gk20a_fecs_trace *trace = g->fecs_trace;
204
205 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
206
207 nvgpu_mutex_acquire(&trace->hash_lock);
208 hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
209 hash_del(&ent->node);
210 nvgpu_kfree(g, ent);
211 }
212 nvgpu_mutex_release(&trace->hash_lock);
213
214}
215
216static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
217{
218 struct gk20a_fecs_trace_hash_ent *ent;
219 struct gk20a_fecs_trace *trace = g->fecs_trace;
220 pid_t pid = 0;
221
222 nvgpu_mutex_acquire(&trace->hash_lock);
223 hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
224 if (ent->context_ptr == context_ptr) {
225 nvgpu_log(g, gpu_dbg_ctxsw,
226 "found context_ptr=%x -> pid=%d",
227 ent->context_ptr, ent->pid);
228 pid = ent->pid;
229 break;
230 }
231 }
232 nvgpu_mutex_release(&trace->hash_lock);
233
234 return pid;
235}
236
237/*
238 * Converts HW entry format to userspace-facing format and pushes it to the
239 * queue.
240 */
241static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
242{
243 int i;
244 struct nvgpu_gpu_ctxsw_trace_entry entry = { };
245 struct gk20a_fecs_trace *trace = g->fecs_trace;
246 pid_t cur_pid;
247 pid_t new_pid;
248 int count = 0;
249
250 /* for now, only one VM */
251 const int vmid = 0;
252
253 struct gk20a_fecs_trace_record *r =
254 gk20a_fecs_trace_get_record(g, index);
255
256 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
257 "consuming record trace=%p read=%d record=%p", trace, index, r);
258
259 if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
260 nvgpu_warn(g,
261 "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
262 trace, index, r, r->magic_lo, r->magic_hi);
263 return -EINVAL;
264 }
265
266 /* Clear magic_hi to detect cases where CPU could read write index
267 * before FECS record is actually written to DRAM. This should not
268 * as we force FECS writes to SYSMEM by reading through PRAMIN.
269 */
270 r->magic_hi = 0;
271
272 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
273 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
274
275 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
276 "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
277 r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
278
279 entry.context_id = r->context_id;
280 entry.vmid = vmid;
281
282 /* break out FECS record into trace events */
283 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
284
285 entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
286 entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
287 entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
288
289 nvgpu_log(g, gpu_dbg_ctxsw,
290 "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
291 entry.tag, entry.timestamp, r->context_id,
292 r->new_context_id);
293
294 switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
295 case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
296 case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
297 entry.context_id = r->new_context_id;
298 entry.pid = new_pid;
299 break;
300
301 case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
302 case NVGPU_GPU_CTXSW_TAG_FE_ACK:
303 case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
304 case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
305 case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
306 case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
307 case NVGPU_GPU_CTXSW_TAG_SAVE_END:
308 entry.context_id = r->context_id;
309 entry.pid = cur_pid;
310 break;
311
312 default:
313 /* tags are not guaranteed to start at the beginning */
314 WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP));
315 continue;
316 }
317
318 nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
319 entry.tag, entry.context_id, entry.pid);
320
321 if (!entry.context_id)
322 continue;
323
324 gk20a_ctxsw_trace_write(g, &entry);
325 count++;
326 }
327
328 gk20a_ctxsw_trace_wake_up(g, vmid);
329 return count;
330}
331
332int gk20a_fecs_trace_poll(struct gk20a *g)
333{
334 struct gk20a_fecs_trace *trace = g->fecs_trace;
335
336 int read = 0;
337 int write = 0;
338 int cnt;
339 int err;
340
341 err = gk20a_busy(g);
342 if (unlikely(err))
343 return err;
344
345 nvgpu_mutex_acquire(&trace->poll_lock);
346 write = gk20a_fecs_trace_get_write_index(g);
347 if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
348 nvgpu_err(g,
349 "failed to acquire write index, write=%d", write);
350 err = write;
351 goto done;
352 }
353
354 read = gk20a_fecs_trace_get_read_index(g);
355
356 cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
357 if (!cnt)
358 goto done;
359
360 nvgpu_log(g, gpu_dbg_ctxsw,
361 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
362 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
363
364 /* Ensure all FECS writes have made it to SYSMEM */
365 g->ops.mm.fb_flush(g);
366
367 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
368 /* Bits 30:0 of MAILBOX1 represents actual read pointer value */
369 read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
370 }
371
372 while (read != write) {
373 cnt = gk20a_fecs_trace_ring_read(g, read);
374 if (cnt > 0) {
375 nvgpu_log(g, gpu_dbg_ctxsw,
376 "number of trace entries added: %d", cnt);
377 }
378
379 /* Get to next record. */
380 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
381 }
382
383 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
384 /*
385 * In the next step, read pointer is going to be updated.
386 * So, MSB of read pointer should be set back to 1. This will
387 * keep FECS trace enabled.
388 */
389 read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
390 }
391
392 /* ensure FECS records has been updated before incrementing read index */
393 nvgpu_wmb();
394 gk20a_fecs_trace_set_read_index(g, read);
395
396done:
397 nvgpu_mutex_release(&trace->poll_lock);
398 gk20a_idle(g);
399 return err;
400}
401
402static int gk20a_fecs_trace_periodic_polling(void *arg)
403{
404 struct gk20a *g = (struct gk20a *)arg;
405 struct gk20a_fecs_trace *trace = g->fecs_trace;
406
407 pr_info("%s: running\n", __func__);
408
409 while (!nvgpu_thread_should_stop(&trace->poll_task)) {
410
411 nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
412 GK20A_FECS_TRACE_FRAME_PERIOD_US * 2);
413
414 gk20a_fecs_trace_poll(g);
415 }
416
417 return 0;
418}
419
420size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
421{
422 return GK20A_FECS_TRACE_NUM_RECORDS
423 * ctxsw_prog_record_timestamp_record_size_in_bytes_v();
424}
425
426int gk20a_fecs_trace_init(struct gk20a *g)
427{
428 struct gk20a_fecs_trace *trace;
429 int err;
430
431 trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace));
432 if (!trace) {
433 nvgpu_warn(g, "failed to allocate fecs_trace");
434 return -ENOMEM;
435 }
436 g->fecs_trace = trace;
437
438 err = nvgpu_mutex_init(&trace->poll_lock);
439 if (err)
440 goto clean;
441 err = nvgpu_mutex_init(&trace->hash_lock);
442 if (err)
443 goto clean_poll_lock;
444
445 err = nvgpu_mutex_init(&trace->enable_lock);
446 if (err)
447 goto clean_hash_lock;
448
449 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
450 hash_init(trace->pid_hash_table);
451
452 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
453
454 trace->enable_count = 0;
455 trace->init = true;
456
457 return 0;
458
459clean_hash_lock:
460 nvgpu_mutex_destroy(&trace->hash_lock);
461
462clean_poll_lock:
463 nvgpu_mutex_destroy(&trace->poll_lock);
464clean:
465 nvgpu_kfree(g, trace);
466 g->fecs_trace = NULL;
467 return err;
468}
469
470int gk20a_fecs_trace_bind_channel(struct gk20a *g,
471 struct channel_gk20a *ch)
472{
473 /*
474 * map our circ_buf to the context space and store the GPU VA
475 * in the context header.
476 */
477
478 u32 lo;
479 u32 hi;
480 u64 addr;
481 struct tsg_gk20a *tsg;
482 struct nvgpu_gr_ctx *ch_ctx;
483 struct gk20a_fecs_trace *trace = g->fecs_trace;
484 struct nvgpu_mem *mem;
485 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
486 u32 aperture_mask;
487
488 tsg = tsg_gk20a_from_ch(ch);
489 if (tsg == NULL) {
490 nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
491 return -EINVAL;
492 }
493
494 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
495 "chid=%d context_ptr=%x inst_block=%llx",
496 ch->chid, context_ptr,
497 nvgpu_inst_block_addr(g, &ch->inst_block));
498
499 tsg = tsg_gk20a_from_ch(ch);
500 if (!tsg)
501 return -EINVAL;
502
503 ch_ctx = &tsg->gr_ctx;
504 mem = &ch_ctx->mem;
505
506 if (!trace)
507 return -ENOMEM;
508
509 mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
510
511 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
512 addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
513 nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
514 aperture_mask = 0;
515 } else {
516 addr = nvgpu_inst_block_addr(g, mem);
517 nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
518 aperture_mask = nvgpu_aperture_mask(g, mem,
519 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
520 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
521 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
522 }
523 if (!addr)
524 return -ENOMEM;
525
526 lo = u64_lo32(addr);
527 hi = u64_hi32(addr);
528
529 mem = &ch_ctx->mem;
530
531 nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
532 lo, GK20A_FECS_TRACE_NUM_RECORDS);
533
534 nvgpu_mem_wr(g, mem,
535 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
536 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
537 GK20A_FECS_TRACE_NUM_RECORDS));
538
539 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
540 mem = &ch->ctx_header;
541
542 nvgpu_mem_wr(g, mem,
543 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
544 lo);
545 nvgpu_mem_wr(g, mem,
546 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
547 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
548 aperture_mask);
549
550 /* pid (process identifier) in user space, corresponds to tgid (thread
551 * group id) in kernel space.
552 */
553 gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);
554
555 return 0;
556}
557
558int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
559{
560 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
561
562 if (g->fecs_trace) {
563 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
564 "ch=%p context_ptr=%x", ch, context_ptr);
565
566 if (g->ops.fecs_trace.is_enabled(g)) {
567 if (g->ops.fecs_trace.flush)
568 g->ops.fecs_trace.flush(g);
569 gk20a_fecs_trace_poll(g);
570 }
571 gk20a_fecs_trace_hash_del(g, context_ptr);
572 }
573 return 0;
574}
575
576int gk20a_fecs_trace_reset(struct gk20a *g)
577{
578 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
579
580 if (!g->ops.fecs_trace.is_enabled(g))
581 return 0;
582
583 gk20a_fecs_trace_poll(g);
584 return gk20a_fecs_trace_set_read_index(g, 0);
585}
586
587int gk20a_fecs_trace_deinit(struct gk20a *g)
588{
589 struct gk20a_fecs_trace *trace = g->fecs_trace;
590
591 if (!trace->init)
592 return 0;
593
594 /*
595 * Check if tracer was enabled before attempting to stop the
596 * tracer thread.
597 */
598 if (trace->enable_count > 0) {
599 nvgpu_thread_stop(&trace->poll_task);
600 }
601 gk20a_fecs_trace_free_hash_table(g);
602
603 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
604 nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
605 nvgpu_mutex_destroy(&g->fecs_trace->enable_lock);
606
607 nvgpu_kfree(g, g->fecs_trace);
608 g->fecs_trace = NULL;
609 return 0;
610}
611
612int gk20a_gr_max_entries(struct gk20a *g,
613 struct nvgpu_gpu_ctxsw_trace_filter *filter)
614{
615 int n;
616 int tag;
617
618 /* Compute number of entries per record, with given filter */
619 for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
620 n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
621
622 /* Return max number of entries generated for the whole ring */
623 return n * GK20A_FECS_TRACE_NUM_RECORDS;
624}
625
626int gk20a_fecs_trace_enable(struct gk20a *g)
627{
628 struct gk20a_fecs_trace *trace = g->fecs_trace;
629 int write;
630 int err = 0;
631
632 if (!trace)
633 return -EINVAL;
634
635 nvgpu_mutex_acquire(&trace->enable_lock);
636 trace->enable_count++;
637
638 if (trace->enable_count == 1U) {
639 /* drop data in hw buffer */
640 if (g->ops.fecs_trace.flush)
641 g->ops.fecs_trace.flush(g);
642
643 write = gk20a_fecs_trace_get_write_index(g);
644
645 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
646 /*
647 * For enabling FECS trace support, MAILBOX1's MSB
648 * (Bit 31:31) should be set to 1. Bits 30:0 represents
649 * actual pointer value.
650 */
651 write = write |
652 (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
653 }
654 gk20a_fecs_trace_set_read_index(g, write);
655
656 /*
657 * FECS ucode does a priv holdoff around the assertion of
658 * context reset. So, pri transactions (e.g. mailbox1 register
659 * write) might fail due to this. Hence, do write with ack
660 * i.e. write and read it back to make sure write happened for
661 * mailbox1.
662 */
663 while (gk20a_fecs_trace_get_read_index(g) != write) {
664 nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
665 gk20a_fecs_trace_set_read_index(g, write);
666 }
667
668 err = nvgpu_thread_create(&trace->poll_task, g,
669 gk20a_fecs_trace_periodic_polling, __func__);
670 if (err) {
671 nvgpu_warn(g,
672 "failed to create FECS polling task");
673 goto done;
674 }
675 }
676
677done:
678 nvgpu_mutex_release(&trace->enable_lock);
679 return err;
680}
681
682int gk20a_fecs_trace_disable(struct gk20a *g)
683{
684 struct gk20a_fecs_trace *trace = g->fecs_trace;
685 int read = 0;
686
687 if (trace == NULL) {
688 return -EINVAL;
689 }
690
691 nvgpu_mutex_acquire(&trace->enable_lock);
692 if (trace->enable_count <= 0U) {
693 nvgpu_mutex_release(&trace->enable_lock);
694 return 0;
695 }
696 trace->enable_count--;
697 if (trace->enable_count == 0U) {
698 if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
699 /*
700 * For disabling FECS trace support, MAILBOX1's MSB
701 * (Bit 31:31) should be set to 0.
702 */
703 read = gk20a_fecs_trace_get_read_index(g) &
704 (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
705
706 gk20a_fecs_trace_set_read_index(g, read);
707
708 /*
709 * FECS ucode does a priv holdoff around the assertion
710 * of context reset. So, pri transactions (e.g.
711 * mailbox1 register write) might fail due to this.
712 * Hence, do write with ack i.e. write and read it back
713 * to make sure write happened for mailbox1.
714 */
715 while (gk20a_fecs_trace_get_read_index(g) != read) {
716 nvgpu_log(g, gpu_dbg_ctxsw,
717 "mailbox1 update failed");
718 gk20a_fecs_trace_set_read_index(g, read);
719 }
720 }
721
722 nvgpu_thread_stop(&trace->poll_task);
723
724 }
725 nvgpu_mutex_release(&trace->enable_lock);
726
727 return -EPERM;
728}
729
730bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
731{
732 struct gk20a_fecs_trace *trace = g->fecs_trace;
733
734 return (trace && nvgpu_thread_is_running(&trace->poll_task));
735}
736
737void gk20a_fecs_trace_reset_buffer(struct gk20a *g)
738{
739 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
740
741 gk20a_fecs_trace_set_read_index(g,
742 gk20a_fecs_trace_get_write_index(g));
743}
744#endif /* CONFIG_GK20A_CTXSW_TRACE */