aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/ctxsw_trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/ctxsw_trace.c')
-rw-r--r--include/os/linux/ctxsw_trace.c792
1 files changed, 792 insertions, 0 deletions
diff --git a/include/os/linux/ctxsw_trace.c b/include/os/linux/ctxsw_trace.c
new file mode 100644
index 0000000..2d36d9c
--- /dev/null
+++ b/include/os/linux/ctxsw_trace.c
@@ -0,0 +1,792 @@
1/*
2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/wait.h>
18#include <linux/ktime.h>
19#include <linux/uaccess.h>
20#include <linux/poll.h>
21#include <trace/events/gk20a.h>
22#include <uapi/linux/nvgpu.h>
23#include <nvgpu/ctxsw_trace.h>
24#include <nvgpu/kmem.h>
25#include <nvgpu/log.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/barrier.h>
28#include <nvgpu/gk20a.h>
29#include <nvgpu/channel.h>
30
31#include "gk20a/gr_gk20a.h"
32#include "gk20a/fecs_trace_gk20a.h"
33
34#include "platform_gk20a.h"
35#include "os_linux.h"
36#include "ctxsw_trace.h"
37
38#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
39#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
40
41#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
42
43/* Userland-facing FIFO (one global + eventually one per VM) */
44struct gk20a_ctxsw_dev {
45 struct gk20a *g;
46
47 struct nvgpu_ctxsw_ring_header *hdr;
48 struct nvgpu_gpu_ctxsw_trace_entry *ents;
49 struct nvgpu_gpu_ctxsw_trace_filter filter;
50 bool write_enabled;
51 struct nvgpu_cond readout_wq;
52 size_t size;
53 u32 num_ents;
54
55 nvgpu_atomic_t vma_ref;
56
57 struct nvgpu_mutex write_lock;
58};
59
60
61struct gk20a_ctxsw_trace {
62 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
63};
64
65static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
66{
67 return (hdr->write_idx == hdr->read_idx);
68}
69
70static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
71{
72 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
73}
74
75static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
76{
77 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
78}
79
80static void nvgpu_set_ctxsw_trace_entry(struct nvgpu_ctxsw_trace_entry *entry_dst,
81 struct nvgpu_gpu_ctxsw_trace_entry *entry_src)
82{
83 entry_dst->tag = entry_src->tag;
84 entry_dst->vmid = entry_src->vmid;
85 entry_dst->seqno = entry_src->seqno;
86 entry_dst->context_id = entry_src->context_id;
87 entry_dst->pid = entry_src->pid;
88 entry_dst->timestamp = entry_src->timestamp;
89}
90
91ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
92 loff_t *off)
93{
94 struct gk20a_ctxsw_dev *dev = filp->private_data;
95 struct gk20a *g = dev->g;
96 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
97 struct nvgpu_ctxsw_trace_entry __user *entry =
98 (struct nvgpu_ctxsw_trace_entry *) buf;
99 struct nvgpu_ctxsw_trace_entry user_entry;
100 size_t copied = 0;
101 int err;
102
103 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
104 "filp=%p buf=%p size=%zu", filp, buf, size);
105
106 nvgpu_mutex_acquire(&dev->write_lock);
107 while (ring_is_empty(hdr)) {
108 nvgpu_mutex_release(&dev->write_lock);
109 if (filp->f_flags & O_NONBLOCK)
110 return -EAGAIN;
111 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
112 !ring_is_empty(hdr), 0);
113 if (err)
114 return err;
115 nvgpu_mutex_acquire(&dev->write_lock);
116 }
117
118 while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) {
119 if (ring_is_empty(hdr))
120 break;
121
122 nvgpu_set_ctxsw_trace_entry(&user_entry, &dev->ents[hdr->read_idx]);
123 if (copy_to_user(entry, &user_entry,
124 sizeof(*entry))) {
125 nvgpu_mutex_release(&dev->write_lock);
126 return -EFAULT;
127 }
128
129 hdr->read_idx++;
130 if (hdr->read_idx >= hdr->num_ents)
131 hdr->read_idx = 0;
132
133 entry++;
134 copied += sizeof(*entry);
135 size -= sizeof(*entry);
136 }
137
138 nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
139 hdr->read_idx);
140
141 *off = hdr->read_idx;
142 nvgpu_mutex_release(&dev->write_lock);
143
144 return copied;
145}
146
147static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
148{
149 struct gk20a *g = dev->g;
150
151 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
152 nvgpu_mutex_acquire(&dev->write_lock);
153 dev->write_enabled = true;
154 nvgpu_mutex_release(&dev->write_lock);
155 dev->g->ops.fecs_trace.enable(dev->g);
156 return 0;
157}
158
159static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
160{
161 struct gk20a *g = dev->g;
162
163 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
164 dev->g->ops.fecs_trace.disable(dev->g);
165 nvgpu_mutex_acquire(&dev->write_lock);
166 dev->write_enabled = false;
167 nvgpu_mutex_release(&dev->write_lock);
168 return 0;
169}
170
171static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
172 size_t size)
173{
174 struct gk20a *g = dev->g;
175 void *buf;
176 int err;
177
178 if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
179 return -EBUSY;
180
181 err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
182 if (err)
183 return err;
184
185
186 dev->hdr = buf;
187 dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1);
188 dev->size = size;
189 dev->num_ents = dev->hdr->num_ents;
190
191 nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
192 dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
193 return 0;
194}
195
196int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
197 void **buf, size_t *size)
198{
199 struct nvgpu_ctxsw_ring_header *hdr;
200
201 *size = roundup(*size, PAGE_SIZE);
202 hdr = vmalloc_user(*size);
203 if (!hdr)
204 return -ENOMEM;
205
206 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
207 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
208 hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
209 / sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
210 hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
211 hdr->drop_count = 0;
212 hdr->read_idx = 0;
213 hdr->write_idx = 0;
214 hdr->write_seqno = 0;
215
216 *buf = hdr;
217 return 0;
218}
219
220int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
221{
222 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
223
224 nvgpu_vfree(g, dev->hdr);
225 return 0;
226}
227
228static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
229 struct nvgpu_ctxsw_ring_setup_args *args)
230{
231 struct gk20a *g = dev->g;
232 size_t size = args->size;
233 int ret;
234
235 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
236
237 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
238 return -EINVAL;
239
240 nvgpu_mutex_acquire(&dev->write_lock);
241 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
242 nvgpu_mutex_release(&dev->write_lock);
243
244 return ret;
245}
246
247static void nvgpu_set_ctxsw_trace_filter_args(struct nvgpu_gpu_ctxsw_trace_filter *filter_dst,
248 struct nvgpu_ctxsw_trace_filter *filter_src)
249{
250 memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
251}
252
253static void nvgpu_get_ctxsw_trace_filter_args(struct nvgpu_ctxsw_trace_filter *filter_dst,
254 struct nvgpu_gpu_ctxsw_trace_filter *filter_src)
255{
256 memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
257}
258
259static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
260 struct nvgpu_ctxsw_trace_filter_args *args)
261{
262 struct gk20a *g = dev->g;
263
264 nvgpu_mutex_acquire(&dev->write_lock);
265 nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter);
266 nvgpu_mutex_release(&dev->write_lock);
267
268 if (g->ops.fecs_trace.set_filter)
269 g->ops.fecs_trace.set_filter(g, &dev->filter);
270 return 0;
271}
272
273static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
274 struct nvgpu_ctxsw_trace_filter_args *args)
275{
276 nvgpu_mutex_acquire(&dev->write_lock);
277 nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter);
278 nvgpu_mutex_release(&dev->write_lock);
279
280 return 0;
281}
282
283static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
284{
285 struct gk20a *g = dev->g;
286 int err;
287
288 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
289
290 err = gk20a_busy(g);
291 if (err)
292 return err;
293
294 if (g->ops.fecs_trace.flush)
295 err = g->ops.fecs_trace.flush(g);
296
297 if (likely(!err))
298 err = g->ops.fecs_trace.poll(g);
299
300 gk20a_idle(g);
301 return err;
302}
303
304int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
305{
306 struct nvgpu_os_linux *l;
307 struct gk20a *g;
308 struct gk20a_ctxsw_trace *trace;
309 struct gk20a_ctxsw_dev *dev;
310 int err;
311 size_t size;
312 u32 n;
313
314 /* only one VM for now */
315 const int vmid = 0;
316
317 l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
318 g = gk20a_get(&l->g);
319 if (!g)
320 return -ENODEV;
321
322 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
323
324 err = gk20a_busy(g);
325 if (err)
326 goto free_ref;
327
328 trace = g->ctxsw_trace;
329 if (!trace) {
330 err = -ENODEV;
331 goto idle;
332 }
333
334 /* Allow only one user for this device */
335 dev = &trace->devs[vmid];
336 nvgpu_mutex_acquire(&dev->write_lock);
337 if (dev->hdr) {
338 err = -EBUSY;
339 goto done;
340 }
341
342 /* By default, allocate ring buffer big enough to accommodate
343 * FECS records with default event filter */
344
345 /* enable all traces by default */
346 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
347
348 /* compute max number of entries generated with this filter */
349 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
350
351 size = sizeof(struct nvgpu_ctxsw_ring_header) +
352 n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
353 nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
354 size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry));
355
356 err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
357 if (!err) {
358 filp->private_data = dev;
359 nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
360 filp, dev, size);
361 }
362
363done:
364 nvgpu_mutex_release(&dev->write_lock);
365
366idle:
367 gk20a_idle(g);
368free_ref:
369 if (err)
370 gk20a_put(g);
371 return err;
372}
373
374int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
375{
376 struct gk20a_ctxsw_dev *dev = filp->private_data;
377 struct gk20a *g = dev->g;
378
379 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
380
381 g->ops.fecs_trace.disable(g);
382
383 nvgpu_mutex_acquire(&dev->write_lock);
384 dev->write_enabled = false;
385 nvgpu_mutex_release(&dev->write_lock);
386
387 if (dev->hdr) {
388 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
389 dev->hdr = NULL;
390 }
391 gk20a_put(g);
392 return 0;
393}
394
395long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
396 unsigned long arg)
397{
398 struct gk20a_ctxsw_dev *dev = filp->private_data;
399 struct gk20a *g = dev->g;
400 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
401 int err = 0;
402
403 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
404
405 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
406 (_IOC_NR(cmd) == 0) ||
407 (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
408 (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
409 return -EINVAL;
410
411 memset(buf, 0, sizeof(buf));
412 if (_IOC_DIR(cmd) & _IOC_WRITE) {
413 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
414 return -EFAULT;
415 }
416
417 switch (cmd) {
418 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
419 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
420 break;
421 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
422 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
423 break;
424 case NVGPU_CTXSW_IOCTL_RING_SETUP:
425 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
426 (struct nvgpu_ctxsw_ring_setup_args *) buf);
427 break;
428 case NVGPU_CTXSW_IOCTL_SET_FILTER:
429 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
430 (struct nvgpu_ctxsw_trace_filter_args *) buf);
431 break;
432 case NVGPU_CTXSW_IOCTL_GET_FILTER:
433 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
434 (struct nvgpu_ctxsw_trace_filter_args *) buf);
435 break;
436 case NVGPU_CTXSW_IOCTL_POLL:
437 err = gk20a_ctxsw_dev_ioctl_poll(dev);
438 break;
439 default:
440 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
441 cmd);
442 err = -ENOTTY;
443 }
444
445 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
446 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
447
448 return err;
449}
450
451unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
452{
453 struct gk20a_ctxsw_dev *dev = filp->private_data;
454 struct gk20a *g = dev->g;
455 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
456 unsigned int mask = 0;
457
458 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
459
460 nvgpu_mutex_acquire(&dev->write_lock);
461 poll_wait(filp, &dev->readout_wq.wq, wait);
462 if (!ring_is_empty(hdr))
463 mask |= POLLIN | POLLRDNORM;
464 nvgpu_mutex_release(&dev->write_lock);
465
466 return mask;
467}
468
469static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
470{
471 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
472 struct gk20a *g = dev->g;
473
474 nvgpu_atomic_inc(&dev->vma_ref);
475 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
476 nvgpu_atomic_read(&dev->vma_ref));
477}
478
479static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
480{
481 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
482 struct gk20a *g = dev->g;
483
484 nvgpu_atomic_dec(&dev->vma_ref);
485 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
486 nvgpu_atomic_read(&dev->vma_ref));
487}
488
489static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
490 .open = gk20a_ctxsw_dev_vma_open,
491 .close = gk20a_ctxsw_dev_vma_close,
492};
493
494int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
495 struct vm_area_struct *vma)
496{
497 return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
498}
499
500int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
501{
502 struct gk20a_ctxsw_dev *dev = filp->private_data;
503 struct gk20a *g = dev->g;
504 int ret;
505
506 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
507 vma->vm_start, vma->vm_end);
508
509 ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
510 if (likely(!ret)) {
511 vma->vm_private_data = dev;
512 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
513 vma->vm_ops->open(vma);
514 }
515
516 return ret;
517}
518
519#ifdef CONFIG_GK20A_CTXSW_TRACE
520static int gk20a_ctxsw_init_devs(struct gk20a *g)
521{
522 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
523 struct gk20a_ctxsw_dev *dev = trace->devs;
524 int err;
525 int i;
526
527 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
528 dev->g = g;
529 dev->hdr = NULL;
530 dev->write_enabled = false;
531 nvgpu_cond_init(&dev->readout_wq);
532 err = nvgpu_mutex_init(&dev->write_lock);
533 if (err)
534 return err;
535 nvgpu_atomic_set(&dev->vma_ref, 0);
536 dev++;
537 }
538 return 0;
539}
540#endif
541
542int gk20a_ctxsw_trace_init(struct gk20a *g)
543{
544#ifdef CONFIG_GK20A_CTXSW_TRACE
545 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
546 int err;
547
548 nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
549
550 /* if tracing is not supported, skip this */
551 if (!g->ops.fecs_trace.init)
552 return 0;
553
554 if (likely(trace)) {
555 __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
556 return 0;
557 }
558
559 trace = nvgpu_kzalloc(g, sizeof(*trace));
560 if (unlikely(!trace))
561 return -ENOMEM;
562 g->ctxsw_trace = trace;
563
564 err = gk20a_ctxsw_init_devs(g);
565 if (err)
566 goto fail;
567
568 err = g->ops.fecs_trace.init(g);
569 if (unlikely(err))
570 goto fail;
571
572 return 0;
573
574fail:
575 memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
576 nvgpu_kfree(g, trace);
577 g->ctxsw_trace = NULL;
578 return err;
579#else
580 return 0;
581#endif
582}
583
584void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
585{
586#ifdef CONFIG_GK20A_CTXSW_TRACE
587 struct gk20a_ctxsw_trace *trace;
588 struct gk20a_ctxsw_dev *dev;
589 int i;
590
591 if (!g->ctxsw_trace)
592 return;
593
594 trace = g->ctxsw_trace;
595 dev = trace->devs;
596
597 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
598 nvgpu_mutex_destroy(&dev->write_lock);
599 dev++;
600 }
601
602 nvgpu_kfree(g, g->ctxsw_trace);
603 g->ctxsw_trace = NULL;
604
605 g->ops.fecs_trace.deinit(g);
606#endif
607}
608
609int gk20a_ctxsw_trace_write(struct gk20a *g,
610 struct nvgpu_gpu_ctxsw_trace_entry *entry)
611{
612 struct nvgpu_ctxsw_ring_header *hdr;
613 struct gk20a_ctxsw_dev *dev;
614 int ret = 0;
615 const char *reason;
616 u32 write_idx;
617
618 if (!g->ctxsw_trace)
619 return 0;
620
621 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
622 return -ENODEV;
623
624 dev = &g->ctxsw_trace->devs[entry->vmid];
625 hdr = dev->hdr;
626
627 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
628 "dev=%p hdr=%p", dev, hdr);
629
630 nvgpu_mutex_acquire(&dev->write_lock);
631
632 if (unlikely(!hdr)) {
633 /* device has been released */
634 ret = -ENODEV;
635 goto done;
636 }
637
638 write_idx = hdr->write_idx;
639 if (write_idx >= dev->num_ents) {
640 nvgpu_err(dev->g,
641 "write_idx=%u out of range [0..%u]",
642 write_idx, dev->num_ents);
643 ret = -ENOSPC;
644 reason = "write_idx out of range";
645 goto disable;
646 }
647
648 entry->seqno = hdr->write_seqno++;
649
650 if (!dev->write_enabled) {
651 ret = -EBUSY;
652 reason = "write disabled";
653 goto drop;
654 }
655
656 if (unlikely(ring_is_full(hdr))) {
657 ret = -ENOSPC;
658 reason = "user fifo full";
659 goto drop;
660 }
661
662 if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
663 reason = "filtered out";
664 goto filter;
665 }
666
667 nvgpu_log(g, gpu_dbg_ctxsw,
668 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
669 entry->seqno, entry->context_id, entry->pid,
670 entry->tag, entry->timestamp);
671
672 dev->ents[write_idx] = *entry;
673
674 /* ensure record is written before updating write index */
675 nvgpu_smp_wmb();
676
677 write_idx++;
678 if (unlikely(write_idx >= hdr->num_ents))
679 write_idx = 0;
680 hdr->write_idx = write_idx;
681 nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
682 hdr->read_idx, hdr->write_idx, ring_len(hdr));
683
684 nvgpu_mutex_release(&dev->write_lock);
685 return ret;
686
687disable:
688 g->ops.fecs_trace.disable(g);
689
690drop:
691 hdr->drop_count++;
692
693filter:
694 nvgpu_log(g, gpu_dbg_ctxsw,
695 "dropping seqno=%d context_id=%08x pid=%lld "
696 "tag=%x time=%llx (%s)",
697 entry->seqno, entry->context_id, entry->pid,
698 entry->tag, entry->timestamp, reason);
699
700done:
701 nvgpu_mutex_release(&dev->write_lock);
702 return ret;
703}
704
705void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
706{
707 struct gk20a_ctxsw_dev *dev;
708
709 if (!g->ctxsw_trace)
710 return;
711
712 dev = &g->ctxsw_trace->devs[vmid];
713 nvgpu_cond_signal_interruptible(&dev->readout_wq);
714}
715
716void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
717{
718#ifdef CONFIG_GK20A_CTXSW_TRACE
719 struct nvgpu_gpu_ctxsw_trace_entry entry = {
720 .vmid = 0,
721 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
722 .context_id = 0,
723 .pid = ch->tgid,
724 };
725
726 if (!g->ctxsw_trace)
727 return;
728
729 g->ops.ptimer.read_ptimer(g, &entry.timestamp);
730 gk20a_ctxsw_trace_write(g, &entry);
731 gk20a_ctxsw_trace_wake_up(g, 0);
732#endif
733 trace_gk20a_channel_reset(ch->chid, ch->tsgid);
734}
735
736void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
737{
738#ifdef CONFIG_GK20A_CTXSW_TRACE
739 struct nvgpu_gpu_ctxsw_trace_entry entry = {
740 .vmid = 0,
741 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
742 .context_id = 0,
743 .pid = tsg->tgid,
744 };
745
746 if (!g->ctxsw_trace)
747 return;
748
749 g->ops.ptimer.read_ptimer(g, &entry.timestamp);
750 gk20a_ctxsw_trace_write(g, &entry);
751 gk20a_ctxsw_trace_wake_up(g, 0);
752#endif
753 trace_gk20a_channel_reset(~0, tsg->tsgid);
754}
755
756/*
757 * Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_*
758 * into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_*
759 */
760
761u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags)
762{
763 switch (tags){
764 case NVGPU_CTXSW_TAG_SOF:
765 return NVGPU_GPU_CTXSW_TAG_SOF;
766 case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
767 return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST;
768 case NVGPU_CTXSW_TAG_FE_ACK:
769 return NVGPU_GPU_CTXSW_TAG_FE_ACK;
770 case NVGPU_CTXSW_TAG_FE_ACK_WFI:
771 return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI;
772 case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
773 return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP;
774 case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
775 return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP;
776 case NVGPU_CTXSW_TAG_FE_ACK_CILP:
777 return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP;
778 case NVGPU_CTXSW_TAG_SAVE_END:
779 return NVGPU_GPU_CTXSW_TAG_SAVE_END;
780 case NVGPU_CTXSW_TAG_RESTORE_START:
781 return NVGPU_GPU_CTXSW_TAG_RESTORE_START;
782 case NVGPU_CTXSW_TAG_CONTEXT_START:
783 return NVGPU_GPU_CTXSW_TAG_CONTEXT_START;
784 case NVGPU_CTXSW_TAG_ENGINE_RESET:
785 return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET;
786 case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP:
787 return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP;
788 }
789
790 WARN_ON(1);
791 return tags;
792}