summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c792
1 files changed, 792 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
new file mode 100644
index 00000000..d283a82e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -0,0 +1,792 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <asm/barrier.h>
24#ifdef CONFIG_DEBUG_FS
25#include <linux/debugfs.h>
26#endif
27#include <uapi/linux/nvgpu.h>
28
29#include <nvgpu/kmem.h>
30#include <nvgpu/dma.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/hashtable.h>
33#include <nvgpu/circ_buf.h>
34#include <nvgpu/thread.h>
35#include <nvgpu/barrier.h>
36#include <nvgpu/mm.h>
37#include <nvgpu/enabled.h>
38#include <nvgpu/ctxsw_trace.h>
39
40#include "fecs_trace_gk20a.h"
41#include "gk20a.h"
42#include "gr_gk20a.h"
43#include "common/linux/os_linux.h"
44
45#include <nvgpu/log.h>
46
47#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
48#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
49
50/*
51 * If HW circular buffer is getting too many "buffer full" conditions,
52 * increasing this constant should help (it drives Linux' internal buffer size).
53 */
54#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)
55#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
56#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
57#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
58
59struct gk20a_fecs_trace_record {
60 u32 magic_lo;
61 u32 magic_hi;
62 u32 context_id;
63 u32 context_ptr;
64 u32 new_context_id;
65 u32 new_context_ptr;
66 u64 ts[];
67};
68
69struct gk20a_fecs_trace_hash_ent {
70 u32 context_ptr;
71 pid_t pid;
72 struct hlist_node node;
73};
74
75struct gk20a_fecs_trace {
76
77 struct nvgpu_mem trace_buf;
78 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
79 struct nvgpu_mutex hash_lock;
80 struct nvgpu_mutex poll_lock;
81 struct nvgpu_thread poll_task;
82 bool init;
83};
84
85#ifdef CONFIG_GK20A_CTXSW_TRACE
86static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
87{
88 return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
89}
90
91static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
92{
93 return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
94}
95
96
97static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
98{
99 return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
100}
101
102static inline int gk20a_fecs_trace_num_ts(void)
103{
104 return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
105 - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
106}
107
108static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
109 struct gk20a_fecs_trace *trace, int idx)
110{
111 return (struct gk20a_fecs_trace_record *)
112 ((u8 *) trace->trace_buf.cpu_va
113 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
114}
115
116static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
117{
118 /*
119 * testing magic_hi should suffice. magic_lo is sometimes used
120 * as a sequence number in experimental ucode.
121 */
122 return (r->magic_hi
123 == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
124}
125
126static int gk20a_fecs_trace_get_read_index(struct gk20a *g)
127{
128 return gr_gk20a_elpg_protected_call(g,
129 gk20a_readl(g, gr_fecs_mailbox1_r()));
130}
131
132static int gk20a_fecs_trace_get_write_index(struct gk20a *g)
133{
134 return gr_gk20a_elpg_protected_call(g,
135 gk20a_readl(g, gr_fecs_mailbox0_r()));
136}
137
138static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
139{
140 gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index);
141 return gr_gk20a_elpg_protected_call(g,
142 (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
143}
144
145void gk20a_fecs_trace_hash_dump(struct gk20a *g)
146{
147 u32 bkt;
148 struct gk20a_fecs_trace_hash_ent *ent;
149 struct gk20a_fecs_trace *trace = g->fecs_trace;
150
151 gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table");
152
153 nvgpu_mutex_acquire(&trace->hash_lock);
154 hash_for_each(trace->pid_hash_table, bkt, ent, node)
155 {
156 gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
157 ent, bkt, ent->context_ptr, ent->pid);
158
159 }
160 nvgpu_mutex_release(&trace->hash_lock);
161}
162
163static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
164{
165 struct gk20a_fecs_trace_hash_ent *he;
166 struct gk20a_fecs_trace *trace = g->fecs_trace;
167
168 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
169 "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
170
171 he = nvgpu_kzalloc(g, sizeof(*he));
172 if (unlikely(!he)) {
173 nvgpu_warn(g,
174 "can't alloc new hash entry for context_ptr=%x pid=%d",
175 context_ptr, pid);
176 return -ENOMEM;
177 }
178
179 he->context_ptr = context_ptr;
180 he->pid = pid;
181 nvgpu_mutex_acquire(&trace->hash_lock);
182 hash_add(trace->pid_hash_table, &he->node, context_ptr);
183 nvgpu_mutex_release(&trace->hash_lock);
184 return 0;
185}
186
187static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
188{
189 struct hlist_node *tmp;
190 struct gk20a_fecs_trace_hash_ent *ent;
191 struct gk20a_fecs_trace *trace = g->fecs_trace;
192
193 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
194 "freeing hash entry context_ptr=%x", context_ptr);
195
196 nvgpu_mutex_acquire(&trace->hash_lock);
197 hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
198 context_ptr) {
199 if (ent->context_ptr == context_ptr) {
200 hash_del(&ent->node);
201 gk20a_dbg(gpu_dbg_ctxsw,
202 "freed hash entry=%p context_ptr=%x", ent,
203 ent->context_ptr);
204 nvgpu_kfree(g, ent);
205 break;
206 }
207 }
208 nvgpu_mutex_release(&trace->hash_lock);
209}
210
211static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
212{
213 u32 bkt;
214 struct hlist_node *tmp;
215 struct gk20a_fecs_trace_hash_ent *ent;
216 struct gk20a_fecs_trace *trace = g->fecs_trace;
217
218 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
219
220 nvgpu_mutex_acquire(&trace->hash_lock);
221 hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
222 hash_del(&ent->node);
223 nvgpu_kfree(g, ent);
224 }
225 nvgpu_mutex_release(&trace->hash_lock);
226
227}
228
229static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
230{
231 struct gk20a_fecs_trace_hash_ent *ent;
232 struct gk20a_fecs_trace *trace = g->fecs_trace;
233 pid_t pid = 0;
234
235 nvgpu_mutex_acquire(&trace->hash_lock);
236 hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
237 if (ent->context_ptr == context_ptr) {
238 gk20a_dbg(gpu_dbg_ctxsw,
239 "found context_ptr=%x -> pid=%d",
240 ent->context_ptr, ent->pid);
241 pid = ent->pid;
242 break;
243 }
244 }
245 nvgpu_mutex_release(&trace->hash_lock);
246
247 return pid;
248}
249
250/*
251 * Converts HW entry format to userspace-facing format and pushes it to the
252 * queue.
253 */
254static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
255{
256 int i;
257 struct nvgpu_ctxsw_trace_entry entry = { };
258 struct gk20a_fecs_trace *trace = g->fecs_trace;
259 pid_t cur_pid;
260 pid_t new_pid;
261
262 /* for now, only one VM */
263 const int vmid = 0;
264
265 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
266 trace, index);
267
268 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
269 "consuming record trace=%p read=%d record=%p", trace, index, r);
270
271 if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
272 nvgpu_warn(g,
273 "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
274 trace, index, r, r->magic_lo, r->magic_hi);
275 return -EINVAL;
276 }
277
278 /* Clear magic_hi to detect cases where CPU could read write index
279 * before FECS record is actually written to DRAM. This should not
280 * as we force FECS writes to SYSMEM by reading through PRAMIN.
281 */
282 r->magic_hi = 0;
283
284 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
285 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
286
287 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
288 "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
289 r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
290
291 entry.context_id = r->context_id;
292 entry.vmid = vmid;
293
294 /* break out FECS record into trace events */
295 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
296
297 entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
298 entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
299 entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
300
301 gk20a_dbg(gpu_dbg_ctxsw,
302 "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
303 entry.tag, entry.timestamp, r->context_id,
304 r->new_context_id);
305
306 switch (entry.tag) {
307 case NVGPU_CTXSW_TAG_RESTORE_START:
308 case NVGPU_CTXSW_TAG_CONTEXT_START:
309 entry.context_id = r->new_context_id;
310 entry.pid = new_pid;
311 break;
312
313 case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
314 case NVGPU_CTXSW_TAG_FE_ACK:
315 case NVGPU_CTXSW_TAG_FE_ACK_WFI:
316 case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
317 case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
318 case NVGPU_CTXSW_TAG_FE_ACK_CILP:
319 case NVGPU_CTXSW_TAG_SAVE_END:
320 entry.context_id = r->context_id;
321 entry.pid = cur_pid;
322 break;
323
324 default:
325 /* tags are not guaranteed to start at the beginning */
326 WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP));
327 continue;
328 }
329
330 gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
331 entry.tag, entry.context_id, entry.pid);
332
333 if (!entry.context_id)
334 continue;
335
336 gk20a_ctxsw_trace_write(g, &entry);
337 }
338
339 gk20a_ctxsw_trace_wake_up(g, vmid);
340 return 0;
341}
342
343int gk20a_fecs_trace_poll(struct gk20a *g)
344{
345 struct gk20a_fecs_trace *trace = g->fecs_trace;
346
347 int read = 0;
348 int write = 0;
349 int cnt;
350 int err;
351
352 err = gk20a_busy(g);
353 if (unlikely(err))
354 return err;
355
356 nvgpu_mutex_acquire(&trace->poll_lock);
357 write = gk20a_fecs_trace_get_write_index(g);
358 if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
359 nvgpu_err(g,
360 "failed to acquire write index, write=%d", write);
361 err = write;
362 goto done;
363 }
364
365 read = gk20a_fecs_trace_get_read_index(g);
366
367 cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
368 if (!cnt)
369 goto done;
370
371 gk20a_dbg(gpu_dbg_ctxsw,
372 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
373 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
374
375 /* Ensure all FECS writes have made it to SYSMEM */
376 g->ops.mm.fb_flush(g);
377
378 while (read != write) {
379 /* Ignore error code, as we want to consume all records */
380 (void)gk20a_fecs_trace_ring_read(g, read);
381
382 /* Get to next record. */
383 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
384 }
385
386 /* ensure FECS records has been updated before incrementing read index */
387 nvgpu_smp_wmb();
388 gk20a_fecs_trace_set_read_index(g, read);
389
390done:
391 nvgpu_mutex_release(&trace->poll_lock);
392 gk20a_idle(g);
393 return err;
394}
395
396static int gk20a_fecs_trace_periodic_polling(void *arg)
397{
398 struct gk20a *g = (struct gk20a *)arg;
399 struct gk20a_fecs_trace *trace = g->fecs_trace;
400
401 pr_info("%s: running\n", __func__);
402
403 while (!nvgpu_thread_should_stop(&trace->poll_task)) {
404
405 nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
406 GK20A_FECS_TRACE_FRAME_PERIOD_US * 2);
407
408 gk20a_fecs_trace_poll(g);
409 }
410
411 return 0;
412}
413
414static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
415{
416 struct gk20a_fecs_trace *trace = g->fecs_trace;
417
418 return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
419 * ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
420 &trace->trace_buf);
421}
422
423static void gk20a_fecs_trace_free_ring(struct gk20a *g)
424{
425 struct gk20a_fecs_trace *trace = g->fecs_trace;
426
427 nvgpu_dma_free(g, &trace->trace_buf);
428}
429
430#ifdef CONFIG_DEBUG_FS
431/*
432 * The sequence iterator functions. We simply use the count of the
433 * next line as our internal position.
434 */
435static void *gk20a_fecs_trace_debugfs_ring_seq_start(
436 struct seq_file *s, loff_t *pos)
437{
438 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
439 return NULL;
440
441 return pos;
442}
443
444static void *gk20a_fecs_trace_debugfs_ring_seq_next(
445 struct seq_file *s, void *v, loff_t *pos)
446{
447 ++(*pos);
448 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
449 return NULL;
450 return pos;
451}
452
453static void gk20a_fecs_trace_debugfs_ring_seq_stop(
454 struct seq_file *s, void *v)
455{
456}
457
458static int gk20a_fecs_trace_debugfs_ring_seq_show(
459 struct seq_file *s, void *v)
460{
461 loff_t *pos = (loff_t *) v;
462 struct gk20a *g = *(struct gk20a **)s->private;
463 struct gk20a_fecs_trace *trace = g->fecs_trace;
464 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
465 int i;
466 const u32 invalid_tag =
467 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
468 u32 tag;
469 u64 timestamp;
470
471 seq_printf(s, "record #%lld (%p)\n", *pos, r);
472 seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
473 seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
474 if (gk20a_fecs_trace_is_valid_record(r)) {
475 seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
476 seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
477 seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
478 seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
479 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
480 tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
481 if (tag == invalid_tag)
482 continue;
483 timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
484 timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
485 seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
486 }
487 }
488 return 0;
489}
490
491/*
492 * Tie them all together into a set of seq_operations.
493 */
494static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = {
495 .start = gk20a_fecs_trace_debugfs_ring_seq_start,
496 .next = gk20a_fecs_trace_debugfs_ring_seq_next,
497 .stop = gk20a_fecs_trace_debugfs_ring_seq_stop,
498 .show = gk20a_fecs_trace_debugfs_ring_seq_show
499};
500
501/*
502 * Time to set up the file operations for our /proc file. In this case,
503 * all we need is an open function which sets up the sequence ops.
504 */
505
506static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode,
507 struct file *file)
508{
509 struct gk20a **p;
510
511 if (!capable(CAP_SYS_ADMIN))
512 return -EPERM;
513
514 p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops,
515 sizeof(struct gk20a *));
516 if (!p)
517 return -ENOMEM;
518
519 *p = (struct gk20a *)inode->i_private;
520 return 0;
521};
522
523/*
524 * The file operations structure contains our open function along with
525 * set of the canned seq_ ops.
526 */
527static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = {
528 .owner = THIS_MODULE,
529 .open = gk20a_ctxsw_debugfs_ring_open,
530 .read = seq_read,
531 .llseek = seq_lseek,
532 .release = seq_release_private
533};
534
535static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val)
536{
537 *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg);
538 return 0;
539}
540DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops,
541 gk20a_fecs_trace_debugfs_read, NULL, "%llu\n");
542
543static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val)
544{
545 *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg);
546 return 0;
547}
548DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops,
549 gk20a_fecs_trace_debugfs_write, NULL, "%llu\n");
550
551static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
552{
553 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
554
555 debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g,
556 &gk20a_fecs_trace_debugfs_read_fops);
557 debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g,
558 &gk20a_fecs_trace_debugfs_write_fops);
559 debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g,
560 &gk20a_fecs_trace_debugfs_ring_fops);
561}
562
563#else
564
565static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
566{
567}
568
569#endif /* CONFIG_DEBUG_FS */
570
571int gk20a_fecs_trace_init(struct gk20a *g)
572{
573 struct gk20a_fecs_trace *trace;
574 int err;
575
576 trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace));
577 if (!trace) {
578 nvgpu_warn(g, "failed to allocate fecs_trace");
579 return -ENOMEM;
580 }
581 g->fecs_trace = trace;
582
583 err = nvgpu_mutex_init(&trace->poll_lock);
584 if (err)
585 goto clean;
586 err = nvgpu_mutex_init(&trace->hash_lock);
587 if (err)
588 goto clean_poll_lock;
589
590 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
591 err = gk20a_fecs_trace_alloc_ring(g);
592 if (err) {
593 nvgpu_warn(g, "failed to allocate FECS ring");
594 goto clean_hash_lock;
595 }
596
597 hash_init(trace->pid_hash_table);
598
599 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
600
601 gk20a_fecs_trace_debugfs_init(g);
602
603 trace->init = true;
604
605 return 0;
606
607clean_hash_lock:
608 nvgpu_mutex_destroy(&trace->hash_lock);
609clean_poll_lock:
610 nvgpu_mutex_destroy(&trace->poll_lock);
611clean:
612 nvgpu_kfree(g, trace);
613 g->fecs_trace = NULL;
614 return err;
615}
616
617int gk20a_fecs_trace_bind_channel(struct gk20a *g,
618 struct channel_gk20a *ch)
619{
620 /*
621 * map our circ_buf to the context space and store the GPU VA
622 * in the context header.
623 */
624
625 u32 lo;
626 u32 hi;
627 u64 pa;
628 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
629 struct gk20a_fecs_trace *trace = g->fecs_trace;
630 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem;
631 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
632 pid_t pid;
633 u32 aperture;
634
635 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
636 "chid=%d context_ptr=%x inst_block=%llx",
637 ch->chid, context_ptr,
638 nvgpu_inst_block_addr(g, &ch->inst_block));
639
640 if (!trace)
641 return -ENOMEM;
642
643 pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
644 if (!pa)
645 return -ENOMEM;
646 aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
647 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
648 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
649
650 if (nvgpu_mem_begin(g, mem))
651 return -ENOMEM;
652
653 lo = u64_lo32(pa);
654 hi = u64_hi32(pa);
655
656 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
657 lo, GK20A_FECS_TRACE_NUM_RECORDS);
658
659 nvgpu_mem_wr(g, mem,
660 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
661 lo);
662 nvgpu_mem_wr(g, mem,
663 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
664 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
665 aperture);
666 nvgpu_mem_wr(g, mem,
667 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
668 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
669 GK20A_FECS_TRACE_NUM_RECORDS));
670
671 nvgpu_mem_end(g, mem);
672
673 /* pid (process identifier) in user space, corresponds to tgid (thread
674 * group id) in kernel space.
675 */
676 if (gk20a_is_channel_marked_as_tsg(ch))
677 pid = tsg_gk20a_from_ch(ch)->tgid;
678 else
679 pid = ch->tgid;
680 gk20a_fecs_trace_hash_add(g, context_ptr, pid);
681
682 return 0;
683}
684
685int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
686{
687 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
688
689 if (g->fecs_trace) {
690 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
691 "ch=%p context_ptr=%x", ch, context_ptr);
692
693 if (g->ops.fecs_trace.is_enabled(g)) {
694 if (g->ops.fecs_trace.flush)
695 g->ops.fecs_trace.flush(g);
696 gk20a_fecs_trace_poll(g);
697 }
698 gk20a_fecs_trace_hash_del(g, context_ptr);
699 }
700 return 0;
701}
702
703int gk20a_fecs_trace_reset(struct gk20a *g)
704{
705 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
706
707 if (!g->ops.fecs_trace.is_enabled(g))
708 return 0;
709
710 gk20a_fecs_trace_poll(g);
711 return gk20a_fecs_trace_set_read_index(g, 0);
712}
713
714int gk20a_fecs_trace_deinit(struct gk20a *g)
715{
716 struct gk20a_fecs_trace *trace = g->fecs_trace;
717
718 if (!trace->init)
719 return 0;
720
721 nvgpu_thread_stop(&trace->poll_task);
722 gk20a_fecs_trace_free_ring(g);
723 gk20a_fecs_trace_free_hash_table(g);
724
725 nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
726 nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
727
728 nvgpu_kfree(g, g->fecs_trace);
729 g->fecs_trace = NULL;
730 return 0;
731}
732
733int gk20a_gr_max_entries(struct gk20a *g,
734 struct nvgpu_ctxsw_trace_filter *filter)
735{
736 int n;
737 int tag;
738
739 /* Compute number of entries per record, with given filter */
740 for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
741 n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
742
743 /* Return max number of entries generated for the whole ring */
744 return n * GK20A_FECS_TRACE_NUM_RECORDS;
745}
746
747int gk20a_fecs_trace_enable(struct gk20a *g)
748{
749 struct gk20a_fecs_trace *trace = g->fecs_trace;
750 int write;
751 int err = 0;
752
753 if (!trace)
754 return -EINVAL;
755
756 if (nvgpu_thread_is_running(&trace->poll_task))
757 return 0;
758
759 /* drop data in hw buffer */
760 if (g->ops.fecs_trace.flush)
761 g->ops.fecs_trace.flush(g);
762 write = gk20a_fecs_trace_get_write_index(g);
763 gk20a_fecs_trace_set_read_index(g, write);
764
765 err = nvgpu_thread_create(&trace->poll_task, g,
766 gk20a_fecs_trace_periodic_polling, __func__);
767 if (err) {
768 nvgpu_warn(g,
769 "failed to create FECS polling task");
770 return err;
771 }
772
773 return 0;
774}
775
776int gk20a_fecs_trace_disable(struct gk20a *g)
777{
778 struct gk20a_fecs_trace *trace = g->fecs_trace;
779
780 if (nvgpu_thread_is_running(&trace->poll_task))
781 nvgpu_thread_stop(&trace->poll_task);
782
783 return -EPERM;
784}
785
786bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
787{
788 struct gk20a_fecs_trace *trace = g->fecs_trace;
789
790 return (trace && nvgpu_thread_is_running(&trace->poll_task));
791}
792#endif /* CONFIG_GK20A_CTXSW_TRACE */