summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c727
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h18
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h14
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
7 files changed, 48 insertions, 747 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0d011b06..546f4164 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
553 gk20a_dbg_info("freeing bound channel context, timeout=%ld", 553 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
554 timeout); 554 timeout);
555 555
556#ifdef CONFIG_GK20A_CTXSW_TRACE
556 if (g->ops.fecs_trace.unbind_channel && !ch->vpr) 557 if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
557 g->ops.fecs_trace.unbind_channel(g, ch); 558 g->ops.fecs_trace.unbind_channel(g, ch);
559#endif
558 560
559 /* release channel ctx */ 561 /* release channel ctx */
560 g->ops.gr.free_channel_ctx(ch, was_tsg); 562 g->ops.gr.free_channel_ctx(ch, was_tsg);
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
deleted file mode 100644
index fb33de23..00000000
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ /dev/null
@@ -1,727 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <asm/barrier.h>
24#include <linux/wait.h>
25#include <linux/ktime.h>
26#include <linux/uaccess.h>
27#include <linux/poll.h>
28#include <trace/events/gk20a.h>
29#include <uapi/linux/nvgpu.h>
30
31#include <nvgpu/kmem.h>
32
33#include "ctxsw_trace_gk20a.h"
34#include "gk20a.h"
35#include "platform_gk20a.h"
36#include "gr_gk20a.h"
37#include "common/linux/os_linux.h"
38
39#include <nvgpu/log.h>
40#include <nvgpu/atomic.h>
41#include <nvgpu/barrier.h>
42
43#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
45
46#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
47
48/* Userland-facing FIFO (one global + eventually one per VM) */
49struct gk20a_ctxsw_dev {
50 struct gk20a *g;
51
52 struct nvgpu_ctxsw_ring_header *hdr;
53 struct nvgpu_ctxsw_trace_entry *ents;
54 struct nvgpu_ctxsw_trace_filter filter;
55 bool write_enabled;
56 struct nvgpu_cond readout_wq;
57 size_t size;
58 u32 num_ents;
59
60 nvgpu_atomic_t vma_ref;
61
62 struct nvgpu_mutex write_lock;
63};
64
65
66struct gk20a_ctxsw_trace {
67 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
68};
69
70static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
71{
72 return (hdr->write_idx == hdr->read_idx);
73}
74
75static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
76{
77 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
78}
79
80static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
81{
82 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
83}
84
85ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
86 loff_t *off)
87{
88 struct gk20a_ctxsw_dev *dev = filp->private_data;
89 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
90 struct nvgpu_ctxsw_trace_entry __user *entry =
91 (struct nvgpu_ctxsw_trace_entry *) buf;
92 size_t copied = 0;
93 int err;
94
95 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
96 "filp=%p buf=%p size=%zu", filp, buf, size);
97
98 nvgpu_mutex_acquire(&dev->write_lock);
99 while (ring_is_empty(hdr)) {
100 nvgpu_mutex_release(&dev->write_lock);
101 if (filp->f_flags & O_NONBLOCK)
102 return -EAGAIN;
103 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
104 !ring_is_empty(hdr), 0);
105 if (err)
106 return err;
107 nvgpu_mutex_acquire(&dev->write_lock);
108 }
109
110 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
111 if (ring_is_empty(hdr))
112 break;
113
114 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
115 sizeof(*entry))) {
116 nvgpu_mutex_release(&dev->write_lock);
117 return -EFAULT;
118 }
119
120 hdr->read_idx++;
121 if (hdr->read_idx >= hdr->num_ents)
122 hdr->read_idx = 0;
123
124 entry++;
125 copied += sizeof(*entry);
126 size -= sizeof(*entry);
127 }
128
129 gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
130 hdr->read_idx);
131
132 *off = hdr->read_idx;
133 nvgpu_mutex_release(&dev->write_lock);
134
135 return copied;
136}
137
138static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
139{
140 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
141 nvgpu_mutex_acquire(&dev->write_lock);
142 dev->write_enabled = true;
143 nvgpu_mutex_release(&dev->write_lock);
144 dev->g->ops.fecs_trace.enable(dev->g);
145 return 0;
146}
147
148static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
149{
150 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
151 dev->g->ops.fecs_trace.disable(dev->g);
152 nvgpu_mutex_acquire(&dev->write_lock);
153 dev->write_enabled = false;
154 nvgpu_mutex_release(&dev->write_lock);
155 return 0;
156}
157
158static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
159 size_t size)
160{
161 struct gk20a *g = dev->g;
162 void *buf;
163 int err;
164
165 if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
166 return -EBUSY;
167
168 err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
169 if (err)
170 return err;
171
172
173 dev->hdr = buf;
174 dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
175 dev->size = size;
176 dev->num_ents = dev->hdr->num_ents;
177
178 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
179 dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
180 return 0;
181}
182
183int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
184 void **buf, size_t *size)
185{
186 struct nvgpu_ctxsw_ring_header *hdr;
187
188 *size = roundup(*size, PAGE_SIZE);
189 hdr = vmalloc_user(*size);
190 if (!hdr)
191 return -ENOMEM;
192
193 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
194 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
195 hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
196 / sizeof(struct nvgpu_ctxsw_trace_entry);
197 hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
198 hdr->drop_count = 0;
199 hdr->read_idx = 0;
200 hdr->write_idx = 0;
201 hdr->write_seqno = 0;
202
203 *buf = hdr;
204 return 0;
205}
206
207int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
208{
209 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
210
211 nvgpu_vfree(g, dev->hdr);
212 return 0;
213}
214
215static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
216 struct nvgpu_ctxsw_ring_setup_args *args)
217{
218 size_t size = args->size;
219 int ret;
220
221 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
222
223 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
224 return -EINVAL;
225
226 nvgpu_mutex_acquire(&dev->write_lock);
227 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
228 nvgpu_mutex_release(&dev->write_lock);
229
230 return ret;
231}
232
233static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
234 struct nvgpu_ctxsw_trace_filter_args *args)
235{
236 struct gk20a *g = dev->g;
237
238 nvgpu_mutex_acquire(&dev->write_lock);
239 dev->filter = args->filter;
240 nvgpu_mutex_release(&dev->write_lock);
241
242 if (g->ops.fecs_trace.set_filter)
243 g->ops.fecs_trace.set_filter(g, &dev->filter);
244 return 0;
245}
246
247static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
248 struct nvgpu_ctxsw_trace_filter_args *args)
249{
250 nvgpu_mutex_acquire(&dev->write_lock);
251 args->filter = dev->filter;
252 nvgpu_mutex_release(&dev->write_lock);
253
254 return 0;
255}
256
257static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
258{
259 struct gk20a *g = dev->g;
260 int err;
261
262 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
263
264 err = gk20a_busy(g);
265 if (err)
266 return err;
267
268 if (g->ops.fecs_trace.flush)
269 err = g->ops.fecs_trace.flush(g);
270
271 if (likely(!err))
272 err = g->ops.fecs_trace.poll(g);
273
274 gk20a_idle(g);
275 return err;
276}
277
278int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
279{
280 struct nvgpu_os_linux *l;
281 struct gk20a *g;
282 struct gk20a_ctxsw_trace *trace;
283 struct gk20a_ctxsw_dev *dev;
284 int err;
285 size_t size;
286 u32 n;
287
288 /* only one VM for now */
289 const int vmid = 0;
290
291 l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
292 g = gk20a_get(&l->g);
293 if (!g)
294 return -ENODEV;
295
296 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
297
298 if (!capable(CAP_SYS_ADMIN)) {
299 err = -EPERM;
300 goto free_ref;
301 }
302
303 err = gk20a_busy(g);
304 if (err)
305 goto free_ref;
306
307 trace = g->ctxsw_trace;
308 if (!trace) {
309 err = -ENODEV;
310 goto idle;
311 }
312
313 /* Allow only one user for this device */
314 dev = &trace->devs[vmid];
315 nvgpu_mutex_acquire(&dev->write_lock);
316 if (dev->hdr) {
317 err = -EBUSY;
318 goto done;
319 }
320
321 /* By default, allocate ring buffer big enough to accommodate
322 * FECS records with default event filter */
323
324 /* enable all traces by default */
325 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
326
327 /* compute max number of entries generated with this filter */
328 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
329
330 size = sizeof(struct nvgpu_ctxsw_ring_header) +
331 n * sizeof(struct nvgpu_ctxsw_trace_entry);
332 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
333 size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
334
335 err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
336 if (!err) {
337 filp->private_data = dev;
338 gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
339 filp, dev, size);
340 }
341
342done:
343 nvgpu_mutex_release(&dev->write_lock);
344
345idle:
346 gk20a_idle(g);
347free_ref:
348 if (err)
349 gk20a_put(g);
350 return err;
351}
352
353int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
354{
355 struct gk20a_ctxsw_dev *dev = filp->private_data;
356 struct gk20a *g = dev->g;
357
358 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
359
360 g->ops.fecs_trace.disable(g);
361
362 nvgpu_mutex_acquire(&dev->write_lock);
363 dev->write_enabled = false;
364 nvgpu_mutex_release(&dev->write_lock);
365
366 if (dev->hdr) {
367 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
368 dev->hdr = NULL;
369 }
370 gk20a_put(g);
371 return 0;
372}
373
374long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
375 unsigned long arg)
376{
377 struct gk20a_ctxsw_dev *dev = filp->private_data;
378 struct gk20a *g = dev->g;
379 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
380 int err = 0;
381
382 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
383
384 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
385 (_IOC_NR(cmd) == 0) ||
386 (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
387 (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
388 return -EINVAL;
389
390 memset(buf, 0, sizeof(buf));
391 if (_IOC_DIR(cmd) & _IOC_WRITE) {
392 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
393 return -EFAULT;
394 }
395
396 switch (cmd) {
397 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
398 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
399 break;
400 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
401 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
402 break;
403 case NVGPU_CTXSW_IOCTL_RING_SETUP:
404 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
405 (struct nvgpu_ctxsw_ring_setup_args *) buf);
406 break;
407 case NVGPU_CTXSW_IOCTL_SET_FILTER:
408 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
409 (struct nvgpu_ctxsw_trace_filter_args *) buf);
410 break;
411 case NVGPU_CTXSW_IOCTL_GET_FILTER:
412 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
413 (struct nvgpu_ctxsw_trace_filter_args *) buf);
414 break;
415 case NVGPU_CTXSW_IOCTL_POLL:
416 err = gk20a_ctxsw_dev_ioctl_poll(dev);
417 break;
418 default:
419 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
420 cmd);
421 err = -ENOTTY;
422 }
423
424 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
425 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
426
427 return err;
428}
429
430unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
431{
432 struct gk20a_ctxsw_dev *dev = filp->private_data;
433 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
434 unsigned int mask = 0;
435
436 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
437
438 nvgpu_mutex_acquire(&dev->write_lock);
439 poll_wait(filp, &dev->readout_wq.wq, wait);
440 if (!ring_is_empty(hdr))
441 mask |= POLLIN | POLLRDNORM;
442 nvgpu_mutex_release(&dev->write_lock);
443
444 return mask;
445}
446
447static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
448{
449 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
450
451 nvgpu_atomic_inc(&dev->vma_ref);
452 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
453 nvgpu_atomic_read(&dev->vma_ref));
454}
455
456static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
457{
458 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
459
460 nvgpu_atomic_dec(&dev->vma_ref);
461 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
462 nvgpu_atomic_read(&dev->vma_ref));
463}
464
465static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
466 .open = gk20a_ctxsw_dev_vma_open,
467 .close = gk20a_ctxsw_dev_vma_close,
468};
469
470int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
471 struct vm_area_struct *vma)
472{
473 return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
474}
475
476int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
477{
478 struct gk20a_ctxsw_dev *dev = filp->private_data;
479 int ret;
480
481 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
482 vma->vm_start, vma->vm_end);
483
484 ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
485 if (likely(!ret)) {
486 vma->vm_private_data = dev;
487 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
488 vma->vm_ops->open(vma);
489 }
490
491 return ret;
492}
493
494#ifdef CONFIG_GK20A_CTXSW_TRACE
495static int gk20a_ctxsw_init_devs(struct gk20a *g)
496{
497 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
498 struct gk20a_ctxsw_dev *dev = trace->devs;
499 int err;
500 int i;
501
502 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
503 dev->g = g;
504 dev->hdr = NULL;
505 dev->write_enabled = false;
506 nvgpu_cond_init(&dev->readout_wq);
507 err = nvgpu_mutex_init(&dev->write_lock);
508 if (err)
509 return err;
510 nvgpu_atomic_set(&dev->vma_ref, 0);
511 dev++;
512 }
513 return 0;
514}
515#endif
516
517int gk20a_ctxsw_trace_init(struct gk20a *g)
518{
519#ifdef CONFIG_GK20A_CTXSW_TRACE
520 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
521 int err;
522
523 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
524
525 /* if tracing is not supported, skip this */
526 if (!g->ops.fecs_trace.init)
527 return 0;
528
529 if (likely(trace))
530 return 0;
531
532 trace = nvgpu_kzalloc(g, sizeof(*trace));
533 if (unlikely(!trace))
534 return -ENOMEM;
535 g->ctxsw_trace = trace;
536
537 err = gk20a_ctxsw_init_devs(g);
538 if (err)
539 goto fail;
540
541 err = g->ops.fecs_trace.init(g);
542 if (unlikely(err))
543 goto fail;
544
545 return 0;
546
547fail:
548 memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
549 nvgpu_kfree(g, trace);
550 g->ctxsw_trace = NULL;
551 return err;
552#else
553 return 0;
554#endif
555}
556
557void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
558{
559#ifdef CONFIG_GK20A_CTXSW_TRACE
560 struct gk20a_ctxsw_trace *trace;
561 struct gk20a_ctxsw_dev *dev;
562 int i;
563
564 if (!g->ctxsw_trace)
565 return;
566
567 trace = g->ctxsw_trace;
568 dev = trace->devs;
569
570 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
571 nvgpu_mutex_destroy(&dev->write_lock);
572 dev++;
573 }
574
575 nvgpu_kfree(g, g->ctxsw_trace);
576 g->ctxsw_trace = NULL;
577
578 g->ops.fecs_trace.deinit(g);
579#endif
580}
581
582int gk20a_ctxsw_trace_write(struct gk20a *g,
583 struct nvgpu_ctxsw_trace_entry *entry)
584{
585 struct nvgpu_ctxsw_ring_header *hdr;
586 struct gk20a_ctxsw_dev *dev;
587 int ret = 0;
588 const char *reason;
589 u32 write_idx;
590
591 if (!g->ctxsw_trace)
592 return 0;
593
594 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
595 return -ENODEV;
596
597 dev = &g->ctxsw_trace->devs[entry->vmid];
598 hdr = dev->hdr;
599
600 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
601 "dev=%p hdr=%p", dev, hdr);
602
603 nvgpu_mutex_acquire(&dev->write_lock);
604
605 if (unlikely(!hdr)) {
606 /* device has been released */
607 ret = -ENODEV;
608 goto done;
609 }
610
611 write_idx = hdr->write_idx;
612 if (write_idx >= dev->num_ents) {
613 nvgpu_err(dev->g,
614 "write_idx=%u out of range [0..%u]",
615 write_idx, dev->num_ents);
616 ret = -ENOSPC;
617 reason = "write_idx out of range";
618 goto disable;
619 }
620
621 entry->seqno = hdr->write_seqno++;
622
623 if (!dev->write_enabled) {
624 ret = -EBUSY;
625 reason = "write disabled";
626 goto drop;
627 }
628
629 if (unlikely(ring_is_full(hdr))) {
630 ret = -ENOSPC;
631 reason = "user fifo full";
632 goto drop;
633 }
634
635 if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
636 reason = "filtered out";
637 goto filter;
638 }
639
640 gk20a_dbg(gpu_dbg_ctxsw,
641 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
642 entry->seqno, entry->context_id, entry->pid,
643 entry->tag, entry->timestamp);
644
645 dev->ents[write_idx] = *entry;
646
647 /* ensure record is written before updating write index */
648 nvgpu_smp_wmb();
649
650 write_idx++;
651 if (unlikely(write_idx >= hdr->num_ents))
652 write_idx = 0;
653 hdr->write_idx = write_idx;
654 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
655 hdr->read_idx, hdr->write_idx, ring_len(hdr));
656
657 nvgpu_mutex_release(&dev->write_lock);
658 return ret;
659
660disable:
661 g->ops.fecs_trace.disable(g);
662
663drop:
664 hdr->drop_count++;
665
666filter:
667 gk20a_dbg(gpu_dbg_ctxsw,
668 "dropping seqno=%d context_id=%08x pid=%lld "
669 "tag=%x time=%llx (%s)",
670 entry->seqno, entry->context_id, entry->pid,
671 entry->tag, entry->timestamp, reason);
672
673done:
674 nvgpu_mutex_release(&dev->write_lock);
675 return ret;
676}
677
678void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
679{
680 struct gk20a_ctxsw_dev *dev;
681
682 if (!g->ctxsw_trace)
683 return;
684
685 dev = &g->ctxsw_trace->devs[vmid];
686 nvgpu_cond_signal_interruptible(&dev->readout_wq);
687}
688
689void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
690{
691#ifdef CONFIG_GK20A_CTXSW_TRACE
692 struct nvgpu_ctxsw_trace_entry entry = {
693 .vmid = 0,
694 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
695 .context_id = 0,
696 .pid = ch->tgid,
697 };
698
699 if (!g->ctxsw_trace)
700 return;
701
702 g->ops.bus.read_ptimer(g, &entry.timestamp);
703 gk20a_ctxsw_trace_write(g, &entry);
704 gk20a_ctxsw_trace_wake_up(g, 0);
705#endif
706 trace_gk20a_channel_reset(ch->chid, ch->tsgid);
707}
708
709void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
710{
711#ifdef CONFIG_GK20A_CTXSW_TRACE
712 struct nvgpu_ctxsw_trace_entry entry = {
713 .vmid = 0,
714 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
715 .context_id = 0,
716 .pid = tsg->tgid,
717 };
718
719 if (!g->ctxsw_trace)
720 return;
721
722 g->ops.bus.read_ptimer(g, &entry.timestamp);
723 gk20a_ctxsw_trace_write(g, &entry);
724 gk20a_ctxsw_trace_wake_up(g, 0);
725#endif
726 trace_gk20a_channel_reset(~0, tsg->tsgid);
727}
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
index b270581b..dddb8603 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
@@ -23,6 +23,8 @@
23#ifndef __CTXSW_TRACE_GK20A_H 23#ifndef __CTXSW_TRACE_GK20A_H
24#define __CTXSW_TRACE_GK20A_H 24#define __CTXSW_TRACE_GK20A_H
25 25
26#include <nvgpu/types.h>
27
26#define GK20A_CTXSW_TRACE_NUM_DEVS 1 28#define GK20A_CTXSW_TRACE_NUM_DEVS 1
27 29
28struct file; 30struct file;
@@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
41int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); 43int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
42long gk20a_ctxsw_dev_ioctl(struct file *filp, 44long gk20a_ctxsw_dev_ioctl(struct file *filp,
43 unsigned int cmd, unsigned long arg); 45 unsigned int cmd, unsigned long arg);
44ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); 46ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
45unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); 47 size_t size, loff_t *offs);
46int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); 48unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
49 struct poll_table_struct *pts);
50int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma);
47int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); 51int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size);
48int gk20a_ctxsw_dev_ring_free(struct gk20a *g); 52int gk20a_ctxsw_dev_ring_free(struct gk20a *g);
49int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); 53int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma);
50 54
51int gk20a_ctxsw_trace_init(struct gk20a *); 55int gk20a_ctxsw_trace_init(struct gk20a *g);
52void gk20a_ctxsw_trace_cleanup(struct gk20a *); 56void gk20a_ctxsw_trace_cleanup(struct gk20a *g);
53int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); 57int gk20a_ctxsw_trace_write(struct gk20a *g,
58 struct nvgpu_ctxsw_trace_entry *entry);
54void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); 59void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
55 60
56void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); 61void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch);
57void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); 62void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg);
58 63
59
60#endif /* __CTXSW_TRACE_GK20A_H */ 64#endif /* __CTXSW_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index d10af9e9..17ae626b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1228 if (nvgpu_pmu_disable_elpg(g)) 1228 if (nvgpu_pmu_disable_elpg(g))
1229 nvgpu_err(g, "failed to set disable elpg"); 1229 nvgpu_err(g, "failed to set disable elpg");
1230 } 1230 }
1231 /* resetting engine will alter read/write index. 1231
1232 * need to flush circular buffer before re-enabling FECS. 1232#ifdef CONFIG_GK20A_CTXSW_TRACE
1233 /*
1234 * Resetting engine will alter read/write index. Need to flush
1235 * circular buffer before re-enabling FECS.
1233 */ 1236 */
1234 if (g->ops.fecs_trace.reset) 1237 if (g->ops.fecs_trace.reset)
1235 g->ops.fecs_trace.reset(g); 1238 g->ops.fecs_trace.reset(g);
1236 /*HALT_PIPELINE method, halt GR engine*/ 1239#endif
1240
1241 /* HALT_PIPELINE method, halt GR engine. */
1237 if (gr_gk20a_halt_pipe(g)) 1242 if (gr_gk20a_halt_pipe(g))
1238 nvgpu_err(g, "failed to HALT gr pipe"); 1243 nvgpu_err(g, "failed to HALT gr pipe");
1239 /* resetting engine using mc_enable_r() is not 1244
1240 enough, we do full init sequence */ 1245 /*
1246 * Resetting engine using mc_enable_r() is not enough; we must
1247 * do full init sequence.
1248 */
1241 gk20a_gr_reset(g); 1249 gk20a_gr_reset(g);
1242 if (g->support_pmu && g->can_elpg) 1250 if (g->support_pmu && g->can_elpg)
1243 nvgpu_pmu_enable_elpg(g); 1251 nvgpu_pmu_enable_elpg(g);
@@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault(
1618 } 1626 }
1619 } 1627 }
1620 } 1628 }
1629
1630#ifdef CONFIG_GK20A_CTXSW_TRACE
1621 /* 1631 /*
1622 * For non fake mmu fault, both tsg and ch pointers 1632 * For non fake mmu fault, both tsg and ch pointers
1623 * could be valid. Check tsg first. 1633 * could be valid. Check tsg first.
@@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault(
1626 gk20a_ctxsw_trace_tsg_reset(g, tsg); 1636 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1627 else if (ch) 1637 else if (ch)
1628 gk20a_ctxsw_trace_channel_reset(g, ch); 1638 gk20a_ctxsw_trace_channel_reset(g, ch);
1639#endif
1629 1640
1630 /* disable the channel/TSG from hw and increment 1641 /*
1631 * syncpoints */ 1642 * Disable the channel/TSG from hw and increment syncpoints.
1632 1643 */
1633 if (tsg) { 1644 if (tsg) {
1634 if (!g->fifo.deferred_reset_pending) { 1645 if (!g->fifo.deferred_reset_pending) {
1635 if (!fake_fault) 1646 if (!fake_fault)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 47f6c56c..703a7c0c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g)
295 goto done; 295 goto done;
296 } 296 }
297 297
298#ifdef CONFIG_GK20A_CTXSW_TRACE
298 err = gk20a_ctxsw_trace_init(g); 299 err = gk20a_ctxsw_trace_init(g);
299 if (err) 300 if (err)
300 nvgpu_warn(g, "could not initialize ctxsw tracing"); 301 nvgpu_warn(g, "could not initialize ctxsw tracing");
302#endif
301 303
302 err = gk20a_sched_ctrl_init(g); 304 err = gk20a_sched_ctrl_init(g);
303 if (err) { 305 if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d7fdffb0..a34f06b2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -689,18 +689,25 @@ struct gpu_ops {
689 int (*get_netlist_name)(struct gk20a *g, int index, char *name); 689 int (*get_netlist_name)(struct gk20a *g, int index, char *name);
690 bool (*is_fw_defined)(void); 690 bool (*is_fw_defined)(void);
691 } gr_ctx; 691 } gr_ctx;
692#ifdef CONFIG_GK20A_CTXSW_TRACE
693 /*
694 * Currently only supported on Linux due to the extremely tight
695 * integration with Linux device driver structure (in particular
696 * mmap).
697 */
692 struct { 698 struct {
693 int (*init)(struct gk20a *g); 699 int (*init)(struct gk20a *g);
694 int (*max_entries)(struct gk20a *, 700 int (*max_entries)(struct gk20a *,
695 struct nvgpu_ctxsw_trace_filter *); 701 struct nvgpu_ctxsw_trace_filter *filter);
696 int (*flush)(struct gk20a *g); 702 int (*flush)(struct gk20a *g);
697 int (*poll)(struct gk20a *g); 703 int (*poll)(struct gk20a *g);
698 int (*enable)(struct gk20a *g); 704 int (*enable)(struct gk20a *g);
699 int (*disable)(struct gk20a *g); 705 int (*disable)(struct gk20a *g);
700 bool (*is_enabled)(struct gk20a *g); 706 bool (*is_enabled)(struct gk20a *g);
701 int (*reset)(struct gk20a *g); 707 int (*reset)(struct gk20a *g);
702 int (*bind_channel)(struct gk20a *, struct channel_gk20a *); 708 int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch);
703 int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); 709 int (*unbind_channel)(struct gk20a *g,
710 struct channel_gk20a *ch);
704 int (*deinit)(struct gk20a *g); 711 int (*deinit)(struct gk20a *g);
705 int (*alloc_user_buffer)(struct gk20a *g, 712 int (*alloc_user_buffer)(struct gk20a *g,
706 void **buf, size_t *size); 713 void **buf, size_t *size);
@@ -710,6 +717,7 @@ struct gpu_ops {
710 int (*set_filter)(struct gk20a *g, 717 int (*set_filter)(struct gk20a *g,
711 struct nvgpu_ctxsw_trace_filter *filter); 718 struct nvgpu_ctxsw_trace_filter *filter);
712 } fecs_trace; 719 } fecs_trace;
720#endif
713 struct { 721 struct {
714 bool (*support_sparse)(struct gk20a *g); 722 bool (*support_sparse)(struct gk20a *g);
715 u64 (*gmmu_map)(struct vm_gk20a *vm, 723 u64 (*gmmu_map)(struct vm_gk20a *vm,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1ea59a9d..f78d862c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3070 "fail to load golden ctx image"); 3070 "fail to load golden ctx image");
3071 goto out; 3071 goto out;
3072 } 3072 }
3073#ifdef CONFIG_GK20A_CTXSW_TRACE
3073 if (g->ops.fecs_trace.bind_channel && !c->vpr) { 3074 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3074 err = g->ops.fecs_trace.bind_channel(g, c); 3075 err = g->ops.fecs_trace.bind_channel(g, c);
3075 if (err) { 3076 if (err)
3076 nvgpu_warn(g, 3077 nvgpu_warn(g,
3077 "fail to bind channel for ctxsw trace"); 3078 "fail to bind channel for ctxsw trace");
3078 }
3079 } 3079 }
3080#endif
3080 c->first_init = true; 3081 c->first_init = true;
3081 } 3082 }
3082 3083