summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
diff options
context:
space:
mode:
authorAnton Vorontsov <avorontsov@nvidia.com>2015-08-19 17:27:51 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-23 10:48:47 -0400
commit1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch)
tree8b93fcd00739f9ada9302f06175278c9cb1d6785 /drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
parent82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff)
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c763
1 files changed, 763 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
new file mode 100644
index 00000000..bac36403
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -0,0 +1,763 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <asm/barrier.h>
15#include <linux/slab.h>
16#include <linux/kthread.h>
17#include <linux/circ_buf.h>
18#include <linux/delay.h>
19#include <linux/jiffies.h>
20#include <linux/wait.h>
21#include <linux/ktime.h>
22#include <linux/nvgpu.h>
23#include <linux/hashtable.h>
24#include <linux/debugfs.h>
25#include <linux/log2.h>
26#include <uapi/linux/nvgpu.h>
27#include "ctxsw_trace_gk20a.h"
28#include "fecs_trace_gk20a.h"
29#include "gk20a.h"
30#include "gr_gk20a.h"
31#include "hw_ctxsw_prog_gk20a.h"
32#include "hw_gr_gk20a.h"
33
34/*
35 * If HW circular buffer is getting too many "buffer full" conditions,
36 * increasing this constant should help (it drives Linux' internal buffer size).
37 */
38#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)
39#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
40#define GK20A_FECS_TRACE_FRAME_PERIOD_NS (1000000000ULL/60ULL)
41#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
42
43struct gk20a_fecs_trace_record {
44 u32 magic_lo;
45 u32 magic_hi;
46 u32 context_id;
47 u32 context_ptr;
48 u32 new_context_id;
49 u32 new_context_ptr;
50 u64 ts[];
51};
52
53struct gk20a_fecs_trace_hash_ent {
54 u32 context_ptr;
55 pid_t pid;
56 struct hlist_node node;
57};
58
59struct gk20a_fecs_trace {
60
61 struct mem_desc trace_buf;
62 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
63 struct mutex hash_lock;
64 struct mutex poll_lock;
65 u64 sof;
66 u32 sof_mask; /* did we already send a SOF for this VM */
67
68 struct task_struct *poll_task;
69};
70
71#ifdef CONFIG_GK20A_CTXSW_TRACE
72static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
73{
74 return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
75}
76
77static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
78{
79 return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
80}
81
82
83static u32 gk20a_fecs_trace_fecs_context_ptr(struct channel_gk20a *ch)
84{
85 return (u32) (sg_phys(ch->inst_block.sgt->sgl) >> 12LL);
86}
87
88static inline int gk20a_fecs_trace_num_ts(void)
89{
90 return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
91 - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
92}
93
94struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
95 struct gk20a_fecs_trace *trace, int idx)
96{
97 return (struct gk20a_fecs_trace_record *)
98 ((u8 *) trace->trace_buf.cpu_va
99 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
100}
101
102static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
103{
104 /*
105 * testing magic_hi should suffice. magic_lo is sometimes used
106 * as a sequence number in experimental ucode.
107 */
108 return (r->magic_hi
109 == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
110}
111
112static int gk20a_fecs_trace_get_read_index(struct gk20a *g)
113{
114 return gr_gk20a_elpg_protected_call(g,
115 gk20a_readl(g, gr_fecs_mailbox1_r()));
116}
117
118static int gk20a_fecs_trace_get_write_index(struct gk20a *g)
119{
120 return gr_gk20a_elpg_protected_call(g,
121 gk20a_readl(g, gr_fecs_mailbox0_r()));
122}
123
124static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
125{
126 gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index);
127 return gr_gk20a_elpg_protected_call(g,
128 (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
129}
130
131void gk20a_fecs_trace_hash_dump(struct gk20a *g)
132{
133 u32 bkt;
134 struct gk20a_fecs_trace_hash_ent *ent;
135 struct gk20a_fecs_trace *trace = g->fecs_trace;
136
137 gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table");
138
139 mutex_lock(&trace->hash_lock);
140 hash_for_each(trace->pid_hash_table, bkt, ent, node)
141 {
142 gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
143 ent, bkt, ent->context_ptr, ent->pid);
144
145 }
146 mutex_unlock(&trace->hash_lock);
147}
148
149static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
150{
151 struct gk20a_fecs_trace_hash_ent *he;
152 struct gk20a_fecs_trace *trace = g->fecs_trace;
153
154 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
155 "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
156
157 he = kzalloc(sizeof(*he), GFP_KERNEL);
158 if (unlikely(!he)) {
159 gk20a_warn(dev_from_gk20a(g),
160 "can't alloc new hash entry for context_ptr=%x pid=%d",
161 context_ptr, pid);
162 return -ENOMEM;
163 }
164
165 he->context_ptr = context_ptr;
166 he->pid = pid;
167 mutex_lock(&trace->hash_lock);
168 hash_add(trace->pid_hash_table, &he->node, context_ptr);
169 mutex_unlock(&trace->hash_lock);
170 return 0;
171}
172
173static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
174{
175 struct hlist_node *tmp;
176 struct gk20a_fecs_trace_hash_ent *ent;
177 struct gk20a_fecs_trace *trace = g->fecs_trace;
178
179 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
180 "freeing hash entry context_ptr=%x", context_ptr);
181
182 mutex_lock(&trace->hash_lock);
183 hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
184 context_ptr) {
185 if (ent->context_ptr == context_ptr) {
186 hash_del(&ent->node);
187 gk20a_dbg(gpu_dbg_ctxsw,
188 "freed hash entry=%p context_ptr=%x", ent,
189 ent->context_ptr);
190 kfree(ent);
191 break;
192 }
193 }
194 mutex_unlock(&trace->hash_lock);
195}
196
197static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
198{
199 u32 bkt;
200 struct hlist_node *tmp;
201 struct gk20a_fecs_trace_hash_ent *ent;
202 struct gk20a_fecs_trace *trace = g->fecs_trace;
203
204 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
205
206 mutex_lock(&trace->hash_lock);
207 hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
208 hash_del(&ent->node);
209 kfree(ent);
210 }
211 mutex_unlock(&trace->hash_lock);
212
213}
214
215static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
216{
217 struct gk20a_fecs_trace_hash_ent *ent;
218 struct gk20a_fecs_trace *trace = g->fecs_trace;
219 pid_t pid = 0;
220
221 mutex_lock(&trace->hash_lock);
222 hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
223 if (ent->context_ptr == context_ptr) {
224 gk20a_dbg(gpu_dbg_ctxsw,
225 "found context_ptr=%x -> pid=%d",
226 ent->context_ptr, ent->pid);
227 pid = ent->pid;
228 break;
229 }
230 }
231 mutex_unlock(&trace->hash_lock);
232
233 return pid;
234}
235
236/*
237 * Converts HW entry format to userspace-facing format and pushes it to the
238 * queue.
239 */
240static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
241{
242 int i;
243 struct nvgpu_ctxsw_trace_entry entry = { };
244 struct gk20a_fecs_trace *trace = g->fecs_trace;
245 pid_t cur_pid;
246 pid_t new_pid;
247
248 /* for now, only one VM */
249 const int vmid = 0;
250
251 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
252 trace, index);
253
254 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
255 "consuming record trace=%p read=%d record=%p", trace, index, r);
256
257 if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
258 gk20a_warn(dev_from_gk20a(g),
259 "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
260 trace, index, r, r->magic_lo, r->magic_hi);
261 return -EINVAL;
262 }
263
264 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
265 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
266
267 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
268 "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
269 r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
270
271 entry.context_id = r->context_id;
272 entry.vmid = vmid;
273
274 /* insert SOF event if needed */
275 if (!(trace->sof_mask & BIT(vmid))) {
276 entry.tag = NVGPU_CTXSW_TAG_SOF;
277 entry.timestamp = trace->sof;
278 entry.context_id = 0;
279 entry.pid = 0;
280
281 gk20a_dbg(gpu_dbg_ctxsw, "SOF time=%llx", entry.timestamp);
282 gk20a_ctxsw_trace_write(g, &entry);
283 trace->sof_mask |= BIT(vmid);
284 }
285
286 /* break out FECS record into trace events */
287 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
288
289 entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
290 entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
291 entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
292
293 gk20a_dbg(gpu_dbg_ctxsw,
294 "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
295 entry.tag, entry.timestamp, r->context_id,
296 r->new_context_id);
297
298 switch (entry.tag) {
299 case NVGPU_CTXSW_TAG_RESTORE_START:
300 case NVGPU_CTXSW_TAG_CONTEXT_START:
301 entry.context_id = r->new_context_id;
302 entry.pid = new_pid;
303 break;
304
305 case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
306 case NVGPU_CTXSW_TAG_FE_ACK:
307 case NVGPU_CTXSW_TAG_FE_ACK_WFI:
308 case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
309 case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
310 case NVGPU_CTXSW_TAG_FE_ACK_CILP:
311 case NVGPU_CTXSW_TAG_SAVE_END:
312 entry.context_id = r->context_id;
313 entry.pid = cur_pid;
314 break;
315
316 default:
317 /* tags are not guaranteed to start at the beginning */
318 WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP));
319 continue;
320 }
321
322 gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
323 entry.tag, entry.context_id, entry.pid);
324
325 if (!entry.context_id)
326 continue;
327
328 gk20a_ctxsw_trace_write(g, &entry);
329 }
330
331 gk20a_ctxsw_trace_wake_up(g, vmid);
332 return 0;
333}
334
335static int gk20a_fecs_trace_poll(struct gk20a *g)
336{
337 struct gk20a_fecs_trace *trace = g->fecs_trace;
338
339 int read = 0;
340 int write = 0;
341 int cnt;
342 int err;
343
344 err = gk20a_busy(g->dev);
345 if (unlikely(err))
346 return err;
347
348 mutex_lock(&trace->poll_lock);
349 write = gk20a_fecs_trace_get_write_index(g);
350 if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
351 gk20a_err(dev_from_gk20a(g),
352 "failed to acquire write index, write=%d", write);
353 err = write;
354 goto done;
355 }
356
357 read = gk20a_fecs_trace_get_read_index(g);
358
359 cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
360 if (!cnt)
361 goto done;
362
363 gk20a_dbg(gpu_dbg_ctxsw,
364 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
365 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
366
367 /* we did not send any SOF yet */
368 trace->sof_mask = 0;
369
370 /* consume all records */
371 while (read != write) {
372 gk20a_fecs_trace_ring_read(g, read);
373
374 /* Get to next record. */
375 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
376 gk20a_fecs_trace_set_read_index(g, read);
377 }
378
379done:
380 /*
381 * OK, we read out all the entries... a new "frame" starts here.
382 * We remember the Start Of Frame time and insert it on the next
383 * iteration.
384 */
385 trace->sof = gk20a_read_ptimer(g);
386
387 mutex_unlock(&trace->poll_lock);
388 gk20a_idle(g->dev);
389 return err;
390}
391
392static int gk20a_fecs_trace_periodic_polling(void *arg)
393{
394 struct gk20a *g = (struct gk20a *)arg;
395 struct timespec ts = ns_to_timespec(GK20A_FECS_TRACE_FRAME_PERIOD_NS);
396
397 pr_info("%s: running\n", __func__);
398
399 while (!kthread_should_stop()) {
400
401 hrtimer_nanosleep(&ts, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
402
403 gk20a_fecs_trace_poll(g);
404 }
405
406 return 0;
407}
408
409static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
410{
411 struct gk20a_fecs_trace *trace = g->fecs_trace;
412
413 return gk20a_gmmu_alloc(g, GK20A_FECS_TRACE_NUM_RECORDS
414 * ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
415 &trace->trace_buf);
416}
417
418static void gk20a_fecs_trace_free_ring(struct gk20a *g)
419{
420 struct gk20a_fecs_trace *trace = g->fecs_trace;
421
422 gk20a_gmmu_free(g, &trace->trace_buf);
423}
424
425#ifdef CONFIG_DEBUG_FS
426/*
427 * The sequence iterator functions. We simply use the count of the
428 * next line as our internal position.
429 */
430static void *gk20a_fecs_trace_debugfs_ring_seq_start(
431 struct seq_file *s, loff_t *pos)
432{
433 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
434 return NULL;
435
436 return pos;
437}
438
439static void *gk20a_fecs_trace_debugfs_ring_seq_next(
440 struct seq_file *s, void *v, loff_t *pos)
441{
442 ++(*pos);
443 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
444 return NULL;
445 return pos;
446}
447
448static void gk20a_fecs_trace_debugfs_ring_seq_stop(
449 struct seq_file *s, void *v)
450{
451}
452
453static int gk20a_fecs_trace_debugfs_ring_seq_show(
454 struct seq_file *s, void *v)
455{
456 loff_t *pos = (loff_t *) v;
457 struct gk20a *g = *(struct gk20a **)s->private;
458 struct gk20a_fecs_trace *trace = g->fecs_trace;
459 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
460 int i;
461 const u32 invalid_tag =
462 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
463 u32 tag;
464 u64 timestamp;
465
466 seq_printf(s, "record #%lld (%p)\n", *pos, r);
467 seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
468 seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
469 if (gk20a_fecs_trace_is_valid_record(r)) {
470 seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
471 seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
472 seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
473 seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
474 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
475 tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
476 if (tag == invalid_tag)
477 continue;
478 timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
479 timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
480 seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
481 }
482 }
483 return 0;
484}
485
486/*
487 * Tie them all together into a set of seq_operations.
488 */
489const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = {
490 .start = gk20a_fecs_trace_debugfs_ring_seq_start,
491 .next = gk20a_fecs_trace_debugfs_ring_seq_next,
492 .stop = gk20a_fecs_trace_debugfs_ring_seq_stop,
493 .show = gk20a_fecs_trace_debugfs_ring_seq_show
494};
495
496/*
497 * Time to set up the file operations for our /proc file. In this case,
498 * all we need is an open function which sets up the sequence ops.
499 */
500
501static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode,
502 struct file *file)
503{
504 struct gk20a **p;
505
506 if (!capable(CAP_SYS_ADMIN))
507 return -EPERM;
508
509 p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops,
510 sizeof(struct gk20a *));
511 if (!p)
512 return -ENOMEM;
513
514 *p = (struct gk20a *)inode->i_private;
515 return 0;
516};
517
518/*
519 * The file operations structure contains our open function along with
520 * set of the canned seq_ ops.
521 */
522const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = {
523 .owner = THIS_MODULE,
524 .open = gk20a_ctxsw_debugfs_ring_open,
525 .read = seq_read,
526 .llseek = seq_lseek,
527 .release = seq_release_private
528};
529
530static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val)
531{
532 *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg);
533 return 0;
534}
535DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops,
536 gk20a_fecs_trace_debugfs_read, NULL, "%llu\n");
537
538static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val)
539{
540 *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg);
541 return 0;
542}
543DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops,
544 gk20a_fecs_trace_debugfs_write, NULL, "%llu\n");
545
546static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
547{
548 struct gk20a_platform *plat = platform_get_drvdata(g->dev);
549
550 debugfs_create_file("ctxsw_trace_read", 0600, plat->debugfs, g,
551 &gk20a_fecs_trace_debugfs_read_fops);
552 debugfs_create_file("ctxsw_trace_write", 0600, plat->debugfs, g,
553 &gk20a_fecs_trace_debugfs_write_fops);
554 debugfs_create_file("ctxsw_trace_ring", 0600, plat->debugfs, g,
555 &gk20a_fecs_trace_debugfs_ring_fops);
556}
557
558static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g)
559{
560 struct gk20a_platform *plat = platform_get_drvdata(g->dev);
561
562 debugfs_remove_recursive(plat->debugfs);
563}
564
565#else
566
567static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
568{
569}
570
571static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g)
572{
573}
574
575#endif /* CONFIG_DEBUG_FS */
576
577static int gk20a_fecs_trace_init(struct gk20a *g)
578{
579 struct gk20a_fecs_trace *trace;
580 int err;
581
582 trace = kzalloc(sizeof(struct gk20a_fecs_trace), GFP_KERNEL);
583 if (!trace) {
584 gk20a_warn(dev_from_gk20a(g), "failed to allocate fecs_trace");
585 return -ENOMEM;
586 }
587 g->fecs_trace = trace;
588
589 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
590 err = gk20a_fecs_trace_alloc_ring(g);
591 if (err) {
592 gk20a_warn(dev_from_gk20a(g), "failed to allocate FECS ring");
593 goto clean;
594 }
595
596 mutex_init(&trace->poll_lock);
597 mutex_init(&trace->hash_lock);
598 hash_init(trace->pid_hash_table);
599
600 gk20a_fecs_trace_debugfs_init(g);
601 return 0;
602
603clean:
604 kfree(trace);
605 g->fecs_trace = NULL;
606 return err;
607}
608
609static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
610 struct channel_gk20a *ch)
611{
612 /*
613 * map our circ_buf to the context space and store the GPU VA
614 * in the context header.
615 */
616
617 u32 lo;
618 u32 hi;
619 phys_addr_t pa;
620 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
621 struct gk20a_fecs_trace *trace = g->fecs_trace;
622 void *ctx_ptr;
623 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
624
625 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
626 "hw_chid=%d context_ptr=%x inst_block=%llx",
627 ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block));
628
629 if (!trace)
630 return -ENOMEM;
631
632 pa = gk20a_mem_phys(&trace->trace_buf);
633 if (!pa)
634 return -ENOMEM;
635
636 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
637 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0,
638 pgprot_writecombine(PAGE_KERNEL));
639 if (!ctx_ptr)
640 return -ENOMEM;
641
642 lo = u64_lo32(pa);
643 hi = u64_hi32(pa);
644
645 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
646 lo, GK20A_FECS_TRACE_NUM_RECORDS);
647
648 gk20a_mem_wr32(ctx_ptr
649 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
650 0, lo);
651 gk20a_mem_wr32(ctx_ptr
652 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
653 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi));
654 gk20a_mem_wr32(ctx_ptr
655 + ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
656 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
657 GK20A_FECS_TRACE_NUM_RECORDS));
658
659 vunmap(ctx_ptr);
660 gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid);
661
662 return 0;
663}
664
665static int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
666{
667 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
668
669 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
670 "ch=%p context_ptr=%x", ch, context_ptr);
671
672 if (g->ops.fecs_trace.flush)
673 g->ops.fecs_trace.flush(g);
674 gk20a_fecs_trace_poll(g);
675 gk20a_fecs_trace_hash_del(g, context_ptr);
676 return 0;
677}
678
679static int gk20a_fecs_trace_reset(struct gk20a *g)
680{
681 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
682
683 if (g->ops.fecs_trace.flush)
684 g->ops.fecs_trace.flush(g);
685 gk20a_fecs_trace_poll(g);
686 return gk20a_fecs_trace_set_read_index(g, 0);
687}
688
689static int gk20a_fecs_trace_deinit(struct gk20a *g)
690{
691 struct gk20a_fecs_trace *trace = g->fecs_trace;
692
693 gk20a_fecs_trace_debugfs_cleanup(g);
694 kthread_stop(trace->poll_task);
695 gk20a_fecs_trace_free_ring(g);
696 gk20a_fecs_trace_free_hash_table(g);
697
698 kfree(g->fecs_trace);
699 g->fecs_trace = NULL;
700 return 0;
701}
702
703static int gk20a_gr_max_entries(struct gk20a *g,
704 struct nvgpu_ctxsw_trace_filter *filter)
705{
706 int n;
707 int tag;
708
709 /* Compute number of entries per record, with given filter */
710 for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
711 n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
712
713 /* Return max number of entries generated for the whole ring */
714 return n * GK20A_FECS_TRACE_NUM_RECORDS;
715}
716
717static int gk20a_fecs_trace_enable(struct gk20a *g)
718{
719 struct gk20a_fecs_trace *trace = g->fecs_trace;
720 struct task_struct *task;
721
722 if (!trace->poll_task) {
723 task = kthread_run(gk20a_fecs_trace_periodic_polling, g, __func__);
724 if (unlikely(IS_ERR(task))) {
725 gk20a_warn(dev_from_gk20a(g), "failed to create FECS polling task");
726 return PTR_ERR(task);
727 }
728 trace->poll_task = task;
729 }
730
731 return 0;
732}
733
734static int gk20a_fecs_trace_disable(struct gk20a *g)
735{
736 struct gk20a_fecs_trace *trace = g->fecs_trace;
737
738 if (trace->poll_task) {
739 kthread_stop(trace->poll_task);
740 trace->poll_task = NULL;
741 }
742
743 return -EPERM;
744}
745
746void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
747{
748 ops->fecs_trace.init = gk20a_fecs_trace_init;
749 ops->fecs_trace.deinit = gk20a_fecs_trace_deinit;
750 ops->fecs_trace.enable = gk20a_fecs_trace_enable;
751 ops->fecs_trace.disable = gk20a_fecs_trace_disable;
752 ops->fecs_trace.reset = gk20a_fecs_trace_reset;
753 ops->fecs_trace.flush = NULL;
754 ops->fecs_trace.poll = gk20a_fecs_trace_poll;
755 ops->fecs_trace.bind_channel = gk20a_fecs_trace_bind_channel;
756 ops->fecs_trace.unbind_channel = gk20a_fecs_trace_unbind_channel;
757 ops->fecs_trace.max_entries = gk20a_gr_max_entries;
758}
759#else
760void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
761{
762}
763#endif /* CONFIG_GK20A_CTXSW_TRACE */