summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-25 12:56:09 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-29 14:02:15 -0400
commit4d2d890c01b94d10ad55643a4c2c159a98419efe (patch)
tree1043316e5e0dc163da79ae17c089b7818dc54d4b /drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
parenta681c505c96dba58231ac050e4c8f4f81d79540f (diff)
gpu: nvgpu: Move ctxsw_trace_gk20a.c to common/linux
Migrate ctxsw_trace_gk20a.c to common/linux/ctxsw_trace.c. This has been done becasue the ctxsw tracing code is currently too tightly tied to the Linux OS due to usage of a couple system calls: - poll() - mmap() And general Linux driver framework code. As a result pulling the logic out of the FECS tracing code is simply too large a scope for time time being. Instead the code was just copied as much as possible. The HAL ops for the FECS code was hidden behind the FECS tracing config so that the vm_area_struct is not used when QNX does not define said config. All other non-HAL functions called by the FECS ctxsw tracing code ha now also been hidden by this config. This is not pretty but for the time being it seems like the way to go. JIRA NVGPU-287 Change-Id: Ib880ab237f4abd330dc66998692c86c4507149c2 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586547 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c727
1 files changed, 0 insertions, 727 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
deleted file mode 100644
index fb33de23..00000000
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ /dev/null
@@ -1,727 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <asm/barrier.h>
24#include <linux/wait.h>
25#include <linux/ktime.h>
26#include <linux/uaccess.h>
27#include <linux/poll.h>
28#include <trace/events/gk20a.h>
29#include <uapi/linux/nvgpu.h>
30
31#include <nvgpu/kmem.h>
32
33#include "ctxsw_trace_gk20a.h"
34#include "gk20a.h"
35#include "platform_gk20a.h"
36#include "gr_gk20a.h"
37#include "common/linux/os_linux.h"
38
39#include <nvgpu/log.h>
40#include <nvgpu/atomic.h>
41#include <nvgpu/barrier.h>
42
43#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
45
46#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
47
48/* Userland-facing FIFO (one global + eventually one per VM) */
49struct gk20a_ctxsw_dev {
50 struct gk20a *g;
51
52 struct nvgpu_ctxsw_ring_header *hdr;
53 struct nvgpu_ctxsw_trace_entry *ents;
54 struct nvgpu_ctxsw_trace_filter filter;
55 bool write_enabled;
56 struct nvgpu_cond readout_wq;
57 size_t size;
58 u32 num_ents;
59
60 nvgpu_atomic_t vma_ref;
61
62 struct nvgpu_mutex write_lock;
63};
64
65
66struct gk20a_ctxsw_trace {
67 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
68};
69
70static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
71{
72 return (hdr->write_idx == hdr->read_idx);
73}
74
75static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
76{
77 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
78}
79
80static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
81{
82 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
83}
84
85ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
86 loff_t *off)
87{
88 struct gk20a_ctxsw_dev *dev = filp->private_data;
89 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
90 struct nvgpu_ctxsw_trace_entry __user *entry =
91 (struct nvgpu_ctxsw_trace_entry *) buf;
92 size_t copied = 0;
93 int err;
94
95 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
96 "filp=%p buf=%p size=%zu", filp, buf, size);
97
98 nvgpu_mutex_acquire(&dev->write_lock);
99 while (ring_is_empty(hdr)) {
100 nvgpu_mutex_release(&dev->write_lock);
101 if (filp->f_flags & O_NONBLOCK)
102 return -EAGAIN;
103 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
104 !ring_is_empty(hdr), 0);
105 if (err)
106 return err;
107 nvgpu_mutex_acquire(&dev->write_lock);
108 }
109
110 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
111 if (ring_is_empty(hdr))
112 break;
113
114 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
115 sizeof(*entry))) {
116 nvgpu_mutex_release(&dev->write_lock);
117 return -EFAULT;
118 }
119
120 hdr->read_idx++;
121 if (hdr->read_idx >= hdr->num_ents)
122 hdr->read_idx = 0;
123
124 entry++;
125 copied += sizeof(*entry);
126 size -= sizeof(*entry);
127 }
128
129 gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
130 hdr->read_idx);
131
132 *off = hdr->read_idx;
133 nvgpu_mutex_release(&dev->write_lock);
134
135 return copied;
136}
137
138static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
139{
140 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
141 nvgpu_mutex_acquire(&dev->write_lock);
142 dev->write_enabled = true;
143 nvgpu_mutex_release(&dev->write_lock);
144 dev->g->ops.fecs_trace.enable(dev->g);
145 return 0;
146}
147
148static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
149{
150 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
151 dev->g->ops.fecs_trace.disable(dev->g);
152 nvgpu_mutex_acquire(&dev->write_lock);
153 dev->write_enabled = false;
154 nvgpu_mutex_release(&dev->write_lock);
155 return 0;
156}
157
158static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
159 size_t size)
160{
161 struct gk20a *g = dev->g;
162 void *buf;
163 int err;
164
165 if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
166 return -EBUSY;
167
168 err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
169 if (err)
170 return err;
171
172
173 dev->hdr = buf;
174 dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
175 dev->size = size;
176 dev->num_ents = dev->hdr->num_ents;
177
178 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
179 dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
180 return 0;
181}
182
183int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
184 void **buf, size_t *size)
185{
186 struct nvgpu_ctxsw_ring_header *hdr;
187
188 *size = roundup(*size, PAGE_SIZE);
189 hdr = vmalloc_user(*size);
190 if (!hdr)
191 return -ENOMEM;
192
193 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
194 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
195 hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
196 / sizeof(struct nvgpu_ctxsw_trace_entry);
197 hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
198 hdr->drop_count = 0;
199 hdr->read_idx = 0;
200 hdr->write_idx = 0;
201 hdr->write_seqno = 0;
202
203 *buf = hdr;
204 return 0;
205}
206
207int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
208{
209 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
210
211 nvgpu_vfree(g, dev->hdr);
212 return 0;
213}
214
215static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
216 struct nvgpu_ctxsw_ring_setup_args *args)
217{
218 size_t size = args->size;
219 int ret;
220
221 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
222
223 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
224 return -EINVAL;
225
226 nvgpu_mutex_acquire(&dev->write_lock);
227 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
228 nvgpu_mutex_release(&dev->write_lock);
229
230 return ret;
231}
232
233static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
234 struct nvgpu_ctxsw_trace_filter_args *args)
235{
236 struct gk20a *g = dev->g;
237
238 nvgpu_mutex_acquire(&dev->write_lock);
239 dev->filter = args->filter;
240 nvgpu_mutex_release(&dev->write_lock);
241
242 if (g->ops.fecs_trace.set_filter)
243 g->ops.fecs_trace.set_filter(g, &dev->filter);
244 return 0;
245}
246
247static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
248 struct nvgpu_ctxsw_trace_filter_args *args)
249{
250 nvgpu_mutex_acquire(&dev->write_lock);
251 args->filter = dev->filter;
252 nvgpu_mutex_release(&dev->write_lock);
253
254 return 0;
255}
256
257static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
258{
259 struct gk20a *g = dev->g;
260 int err;
261
262 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
263
264 err = gk20a_busy(g);
265 if (err)
266 return err;
267
268 if (g->ops.fecs_trace.flush)
269 err = g->ops.fecs_trace.flush(g);
270
271 if (likely(!err))
272 err = g->ops.fecs_trace.poll(g);
273
274 gk20a_idle(g);
275 return err;
276}
277
278int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
279{
280 struct nvgpu_os_linux *l;
281 struct gk20a *g;
282 struct gk20a_ctxsw_trace *trace;
283 struct gk20a_ctxsw_dev *dev;
284 int err;
285 size_t size;
286 u32 n;
287
288 /* only one VM for now */
289 const int vmid = 0;
290
291 l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
292 g = gk20a_get(&l->g);
293 if (!g)
294 return -ENODEV;
295
296 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
297
298 if (!capable(CAP_SYS_ADMIN)) {
299 err = -EPERM;
300 goto free_ref;
301 }
302
303 err = gk20a_busy(g);
304 if (err)
305 goto free_ref;
306
307 trace = g->ctxsw_trace;
308 if (!trace) {
309 err = -ENODEV;
310 goto idle;
311 }
312
313 /* Allow only one user for this device */
314 dev = &trace->devs[vmid];
315 nvgpu_mutex_acquire(&dev->write_lock);
316 if (dev->hdr) {
317 err = -EBUSY;
318 goto done;
319 }
320
321 /* By default, allocate ring buffer big enough to accommodate
322 * FECS records with default event filter */
323
324 /* enable all traces by default */
325 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
326
327 /* compute max number of entries generated with this filter */
328 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
329
330 size = sizeof(struct nvgpu_ctxsw_ring_header) +
331 n * sizeof(struct nvgpu_ctxsw_trace_entry);
332 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
333 size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
334
335 err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
336 if (!err) {
337 filp->private_data = dev;
338 gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
339 filp, dev, size);
340 }
341
342done:
343 nvgpu_mutex_release(&dev->write_lock);
344
345idle:
346 gk20a_idle(g);
347free_ref:
348 if (err)
349 gk20a_put(g);
350 return err;
351}
352
353int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
354{
355 struct gk20a_ctxsw_dev *dev = filp->private_data;
356 struct gk20a *g = dev->g;
357
358 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
359
360 g->ops.fecs_trace.disable(g);
361
362 nvgpu_mutex_acquire(&dev->write_lock);
363 dev->write_enabled = false;
364 nvgpu_mutex_release(&dev->write_lock);
365
366 if (dev->hdr) {
367 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
368 dev->hdr = NULL;
369 }
370 gk20a_put(g);
371 return 0;
372}
373
374long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
375 unsigned long arg)
376{
377 struct gk20a_ctxsw_dev *dev = filp->private_data;
378 struct gk20a *g = dev->g;
379 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
380 int err = 0;
381
382 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
383
384 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
385 (_IOC_NR(cmd) == 0) ||
386 (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
387 (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
388 return -EINVAL;
389
390 memset(buf, 0, sizeof(buf));
391 if (_IOC_DIR(cmd) & _IOC_WRITE) {
392 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
393 return -EFAULT;
394 }
395
396 switch (cmd) {
397 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
398 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
399 break;
400 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
401 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
402 break;
403 case NVGPU_CTXSW_IOCTL_RING_SETUP:
404 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
405 (struct nvgpu_ctxsw_ring_setup_args *) buf);
406 break;
407 case NVGPU_CTXSW_IOCTL_SET_FILTER:
408 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
409 (struct nvgpu_ctxsw_trace_filter_args *) buf);
410 break;
411 case NVGPU_CTXSW_IOCTL_GET_FILTER:
412 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
413 (struct nvgpu_ctxsw_trace_filter_args *) buf);
414 break;
415 case NVGPU_CTXSW_IOCTL_POLL:
416 err = gk20a_ctxsw_dev_ioctl_poll(dev);
417 break;
418 default:
419 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
420 cmd);
421 err = -ENOTTY;
422 }
423
424 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
425 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
426
427 return err;
428}
429
430unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
431{
432 struct gk20a_ctxsw_dev *dev = filp->private_data;
433 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
434 unsigned int mask = 0;
435
436 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
437
438 nvgpu_mutex_acquire(&dev->write_lock);
439 poll_wait(filp, &dev->readout_wq.wq, wait);
440 if (!ring_is_empty(hdr))
441 mask |= POLLIN | POLLRDNORM;
442 nvgpu_mutex_release(&dev->write_lock);
443
444 return mask;
445}
446
447static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
448{
449 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
450
451 nvgpu_atomic_inc(&dev->vma_ref);
452 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
453 nvgpu_atomic_read(&dev->vma_ref));
454}
455
456static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
457{
458 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
459
460 nvgpu_atomic_dec(&dev->vma_ref);
461 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
462 nvgpu_atomic_read(&dev->vma_ref));
463}
464
465static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
466 .open = gk20a_ctxsw_dev_vma_open,
467 .close = gk20a_ctxsw_dev_vma_close,
468};
469
470int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
471 struct vm_area_struct *vma)
472{
473 return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
474}
475
476int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
477{
478 struct gk20a_ctxsw_dev *dev = filp->private_data;
479 int ret;
480
481 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
482 vma->vm_start, vma->vm_end);
483
484 ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
485 if (likely(!ret)) {
486 vma->vm_private_data = dev;
487 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
488 vma->vm_ops->open(vma);
489 }
490
491 return ret;
492}
493
494#ifdef CONFIG_GK20A_CTXSW_TRACE
495static int gk20a_ctxsw_init_devs(struct gk20a *g)
496{
497 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
498 struct gk20a_ctxsw_dev *dev = trace->devs;
499 int err;
500 int i;
501
502 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
503 dev->g = g;
504 dev->hdr = NULL;
505 dev->write_enabled = false;
506 nvgpu_cond_init(&dev->readout_wq);
507 err = nvgpu_mutex_init(&dev->write_lock);
508 if (err)
509 return err;
510 nvgpu_atomic_set(&dev->vma_ref, 0);
511 dev++;
512 }
513 return 0;
514}
515#endif
516
517int gk20a_ctxsw_trace_init(struct gk20a *g)
518{
519#ifdef CONFIG_GK20A_CTXSW_TRACE
520 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
521 int err;
522
523 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
524
525 /* if tracing is not supported, skip this */
526 if (!g->ops.fecs_trace.init)
527 return 0;
528
529 if (likely(trace))
530 return 0;
531
532 trace = nvgpu_kzalloc(g, sizeof(*trace));
533 if (unlikely(!trace))
534 return -ENOMEM;
535 g->ctxsw_trace = trace;
536
537 err = gk20a_ctxsw_init_devs(g);
538 if (err)
539 goto fail;
540
541 err = g->ops.fecs_trace.init(g);
542 if (unlikely(err))
543 goto fail;
544
545 return 0;
546
547fail:
548 memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
549 nvgpu_kfree(g, trace);
550 g->ctxsw_trace = NULL;
551 return err;
552#else
553 return 0;
554#endif
555}
556
557void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
558{
559#ifdef CONFIG_GK20A_CTXSW_TRACE
560 struct gk20a_ctxsw_trace *trace;
561 struct gk20a_ctxsw_dev *dev;
562 int i;
563
564 if (!g->ctxsw_trace)
565 return;
566
567 trace = g->ctxsw_trace;
568 dev = trace->devs;
569
570 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
571 nvgpu_mutex_destroy(&dev->write_lock);
572 dev++;
573 }
574
575 nvgpu_kfree(g, g->ctxsw_trace);
576 g->ctxsw_trace = NULL;
577
578 g->ops.fecs_trace.deinit(g);
579#endif
580}
581
582int gk20a_ctxsw_trace_write(struct gk20a *g,
583 struct nvgpu_ctxsw_trace_entry *entry)
584{
585 struct nvgpu_ctxsw_ring_header *hdr;
586 struct gk20a_ctxsw_dev *dev;
587 int ret = 0;
588 const char *reason;
589 u32 write_idx;
590
591 if (!g->ctxsw_trace)
592 return 0;
593
594 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
595 return -ENODEV;
596
597 dev = &g->ctxsw_trace->devs[entry->vmid];
598 hdr = dev->hdr;
599
600 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
601 "dev=%p hdr=%p", dev, hdr);
602
603 nvgpu_mutex_acquire(&dev->write_lock);
604
605 if (unlikely(!hdr)) {
606 /* device has been released */
607 ret = -ENODEV;
608 goto done;
609 }
610
611 write_idx = hdr->write_idx;
612 if (write_idx >= dev->num_ents) {
613 nvgpu_err(dev->g,
614 "write_idx=%u out of range [0..%u]",
615 write_idx, dev->num_ents);
616 ret = -ENOSPC;
617 reason = "write_idx out of range";
618 goto disable;
619 }
620
621 entry->seqno = hdr->write_seqno++;
622
623 if (!dev->write_enabled) {
624 ret = -EBUSY;
625 reason = "write disabled";
626 goto drop;
627 }
628
629 if (unlikely(ring_is_full(hdr))) {
630 ret = -ENOSPC;
631 reason = "user fifo full";
632 goto drop;
633 }
634
635 if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
636 reason = "filtered out";
637 goto filter;
638 }
639
640 gk20a_dbg(gpu_dbg_ctxsw,
641 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
642 entry->seqno, entry->context_id, entry->pid,
643 entry->tag, entry->timestamp);
644
645 dev->ents[write_idx] = *entry;
646
647 /* ensure record is written before updating write index */
648 nvgpu_smp_wmb();
649
650 write_idx++;
651 if (unlikely(write_idx >= hdr->num_ents))
652 write_idx = 0;
653 hdr->write_idx = write_idx;
654 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
655 hdr->read_idx, hdr->write_idx, ring_len(hdr));
656
657 nvgpu_mutex_release(&dev->write_lock);
658 return ret;
659
660disable:
661 g->ops.fecs_trace.disable(g);
662
663drop:
664 hdr->drop_count++;
665
666filter:
667 gk20a_dbg(gpu_dbg_ctxsw,
668 "dropping seqno=%d context_id=%08x pid=%lld "
669 "tag=%x time=%llx (%s)",
670 entry->seqno, entry->context_id, entry->pid,
671 entry->tag, entry->timestamp, reason);
672
673done:
674 nvgpu_mutex_release(&dev->write_lock);
675 return ret;
676}
677
678void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
679{
680 struct gk20a_ctxsw_dev *dev;
681
682 if (!g->ctxsw_trace)
683 return;
684
685 dev = &g->ctxsw_trace->devs[vmid];
686 nvgpu_cond_signal_interruptible(&dev->readout_wq);
687}
688
689void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
690{
691#ifdef CONFIG_GK20A_CTXSW_TRACE
692 struct nvgpu_ctxsw_trace_entry entry = {
693 .vmid = 0,
694 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
695 .context_id = 0,
696 .pid = ch->tgid,
697 };
698
699 if (!g->ctxsw_trace)
700 return;
701
702 g->ops.bus.read_ptimer(g, &entry.timestamp);
703 gk20a_ctxsw_trace_write(g, &entry);
704 gk20a_ctxsw_trace_wake_up(g, 0);
705#endif
706 trace_gk20a_channel_reset(ch->chid, ch->tsgid);
707}
708
709void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
710{
711#ifdef CONFIG_GK20A_CTXSW_TRACE
712 struct nvgpu_ctxsw_trace_entry entry = {
713 .vmid = 0,
714 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
715 .context_id = 0,
716 .pid = tsg->tgid,
717 };
718
719 if (!g->ctxsw_trace)
720 return;
721
722 g->ops.bus.read_ptimer(g, &entry.timestamp);
723 gk20a_ctxsw_trace_write(g, &entry);
724 gk20a_ctxsw_trace_wake_up(g, 0);
725#endif
726 trace_gk20a_channel_reset(~0, tsg->tsgid);
727}