summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c586
1 files changed, 586 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
new file mode 100644
index 00000000..9e7c04ad
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -0,0 +1,586 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <asm/barrier.h>
15#include <linux/slab.h>
16#include <linux/kthread.h>
17#include <linux/circ_buf.h>
18#include <linux/delay.h>
19#include <linux/jiffies.h>
20#include <linux/wait.h>
21#include <linux/ktime.h>
22#include <linux/nvgpu.h>
23#include <linux/hashtable.h>
24#include <linux/debugfs.h>
25#include <linux/log2.h>
26#include <uapi/linux/nvgpu.h>
27#include "ctxsw_trace_gk20a.h"
28#include "gk20a.h"
29#include "gr_gk20a.h"
30#include "hw_ctxsw_prog_gk20a.h"
31#include "hw_gr_gk20a.h"
32
33#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
34
35/* Userland-facing FIFO (one global + eventually one per VM) */
36struct gk20a_ctxsw_dev {
37 struct gk20a *g;
38
39 struct nvgpu_ctxsw_ring_header *hdr;
40 struct nvgpu_ctxsw_trace_entry *ents;
41 struct nvgpu_ctxsw_trace_filter filter;
42 bool write_enabled;
43 wait_queue_head_t readout_wq;
44 size_t size;
45
46 atomic_t vma_ref;
47
48 struct mutex lock;
49};
50
51
52struct gk20a_ctxsw_trace {
53 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
54};
55
56static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
57{
58 return (hdr->write_idx == hdr->read_idx);
59}
60
61static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
62{
63 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
64}
65
66static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
67{
68 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
69}
70
71static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr)
72{
73 return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents;
74}
75
76ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
77 loff_t *off)
78{
79 struct gk20a_ctxsw_dev *dev = filp->private_data;
80 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
81 struct nvgpu_ctxsw_trace_entry __user *entry =
82 (struct nvgpu_ctxsw_trace_entry *) buf;
83 size_t copied = 0;
84 int err;
85
86 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
87 "filp=%p buf=%p size=%zu", filp, buf, size);
88
89 mutex_lock(&dev->lock);
90 while (ring_is_empty(hdr)) {
91 mutex_unlock(&dev->lock);
92 if (filp->f_flags & O_NONBLOCK)
93 return -EAGAIN;
94 err = wait_event_interruptible(dev->readout_wq,
95 !ring_is_empty(hdr));
96 if (err)
97 return err;
98 mutex_lock(&dev->lock);
99 }
100
101 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
102 if (ring_is_empty(hdr))
103 break;
104
105 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
106 sizeof(*entry))) {
107 mutex_unlock(&dev->lock);
108 return -EFAULT;
109 }
110
111 hdr->read_idx++;
112 if (hdr->read_idx >= hdr->num_ents)
113 hdr->read_idx = 0;
114
115 entry++;
116 copied += sizeof(*entry);
117 size -= sizeof(*entry);
118 }
119
120 gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
121 hdr->read_idx);
122
123 *off = hdr->read_idx;
124 mutex_unlock(&dev->lock);
125
126 return copied;
127}
128
129static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
130{
131 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
132 dev->write_enabled = true;
133 return 0;
134}
135
136static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
137{
138 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
139 dev->write_enabled = false;
140 return 0;
141}
142
143static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev,
144 size_t size)
145{
146 struct nvgpu_ctxsw_ring_header *hdr;
147
148 if (atomic_read(&dev->vma_ref))
149 return -EBUSY;
150
151 if ((dev->write_enabled) || (atomic_read(&dev->vma_ref)))
152 return -EBUSY;
153
154 size = roundup(size, PAGE_SIZE);
155 hdr = vmalloc_user(size);
156 if (!hdr)
157 return -ENOMEM;
158
159 if (dev->hdr)
160 vfree(dev->hdr);
161
162 dev->hdr = hdr;
163 dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
164 dev->size = size;
165
166 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
167 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
168 hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header))
169 / sizeof(struct nvgpu_ctxsw_trace_entry);
170 hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
171 hdr->drop_count = 0;
172 hdr->read_idx = 0;
173 hdr->write_idx = 0;
174 hdr->write_seqno = 0;
175
176 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
177 dev->size, dev->hdr, dev->ents, hdr->num_ents);
178 return 0;
179}
180
181static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
182 struct nvgpu_ctxsw_ring_setup_args *args)
183{
184 size_t size = args->size;
185
186 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
187
188 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
189 return -EINVAL;
190
191 return gk20a_ctxsw_dev_ring_alloc(dev, size);
192}
193
194static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
195 struct nvgpu_ctxsw_trace_filter_args *args)
196{
197 dev->filter = args->filter;
198 return 0;
199}
200
201static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
202 struct nvgpu_ctxsw_trace_filter_args *args)
203{
204 args->filter = dev->filter;
205 return 0;
206}
207
208static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
209{
210 struct gk20a *g = dev->g;
211 int err;
212
213 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
214
215 err = gk20a_busy(g->dev);
216 if (err)
217 return err;
218
219 if (g->ops.fecs_trace.flush(g))
220 err = g->ops.fecs_trace.flush(g);
221
222 if (likely(!err))
223 err = g->ops.fecs_trace.poll(g);
224
225 gk20a_idle(g->dev);
226 return err;
227}
228
229int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
230{
231 struct gk20a *g;
232 struct gk20a_ctxsw_trace *trace;
233 struct gk20a_ctxsw_dev *dev;
234 int err;
235 size_t size;
236 u32 n;
237
238 /* only one VM for now */
239 const int vmid = 0;
240
241 g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
242 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
243
244 if (!capable(CAP_SYS_ADMIN))
245 return -EPERM;
246
247 err = gk20a_busy(g->dev);
248 if (err)
249 return err;
250
251 trace = g->ctxsw_trace;
252 if (!trace) {
253 err = -ENODEV;
254 goto idle;
255 }
256
257 /* Allow only one user for this device */
258 dev = &trace->devs[vmid];
259 mutex_lock(&dev->lock);
260 if (dev->hdr) {
261 err = -EBUSY;
262 goto done;
263 }
264
265 /* By default, allocate ring buffer big enough to accommodate
266 * FECS records with default event filter */
267
268 /* enable all traces by default */
269 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
270
271 /* compute max number of entries generated with this filter */
272 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
273
274 size = sizeof(struct nvgpu_ctxsw_ring_header) +
275 n * sizeof(struct nvgpu_ctxsw_trace_entry);
276 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
277 size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
278
279 err = gk20a_ctxsw_dev_ring_alloc(dev, size);
280 if (!err) {
281 filp->private_data = dev;
282 gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
283 filp, dev, size);
284 }
285
286 err = g->ops.fecs_trace.enable(g);
287
288done:
289 mutex_unlock(&dev->lock);
290
291idle:
292 gk20a_idle(g->dev);
293
294 return err;
295}
296
297int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
298{
299 struct gk20a_ctxsw_dev *dev = filp->private_data;
300 struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
301
302 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
303
304 mutex_lock(&dev->lock);
305 dev->write_enabled = false;
306 if (dev->hdr) {
307 vfree(dev->hdr);
308 dev->hdr = NULL;
309 }
310
311 g->ops.fecs_trace.disable(g);
312
313 mutex_unlock(&dev->lock);
314
315 return 0;
316}
317
318long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
319 unsigned long arg)
320{
321 struct gk20a_ctxsw_dev *dev = filp->private_data;
322 struct gk20a *g = dev->g;
323 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
324 int err = 0;
325
326 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
327
328 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
329 || (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST))
330 return -EINVAL;
331
332 BUG_ON(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE);
333
334 memset(buf, 0, sizeof(buf));
335 if (_IOC_DIR(cmd) & _IOC_WRITE) {
336 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
337 return -EFAULT;
338 }
339
340 mutex_lock(&dev->lock);
341
342 switch (cmd) {
343 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
344 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
345 break;
346 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
347 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
348 break;
349 case NVGPU_CTXSW_IOCTL_RING_SETUP:
350 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
351 (struct nvgpu_ctxsw_ring_setup_args *) buf);
352 break;
353 case NVGPU_CTXSW_IOCTL_SET_FILTER:
354 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
355 (struct nvgpu_ctxsw_trace_filter_args *) buf);
356 break;
357 case NVGPU_CTXSW_IOCTL_GET_FILTER:
358 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
359 (struct nvgpu_ctxsw_trace_filter_args *) buf);
360 break;
361 case NVGPU_CTXSW_IOCTL_POLL:
362 mutex_unlock(&dev->lock);
363 err = gk20a_ctxsw_dev_ioctl_poll(dev);
364 mutex_lock(&dev->lock);
365 break;
366 default:
367 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
368 cmd);
369 err = -ENOTTY;
370 }
371
372 mutex_unlock(&dev->lock);
373
374 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
375 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
376
377 return err;
378}
379
380unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
381{
382 struct gk20a_ctxsw_dev *dev = filp->private_data;
383 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
384 unsigned int mask = 0;
385
386 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
387
388 mutex_lock(&dev->lock);
389 poll_wait(filp, &dev->readout_wq, wait);
390 if (!ring_is_empty(hdr))
391 mask |= POLLIN | POLLRDNORM;
392 mutex_unlock(&dev->lock);
393
394 return mask;
395}
396
397static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
398{
399 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
400
401 atomic_inc(&dev->vma_ref);
402 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
403 atomic_read(&dev->vma_ref));
404}
405
406static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
407{
408 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
409
410 atomic_dec(&dev->vma_ref);
411 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
412 atomic_read(&dev->vma_ref));
413}
414
415static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
416 .open = gk20a_ctxsw_dev_vma_open,
417 .close = gk20a_ctxsw_dev_vma_close,
418};
419
420int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
421{
422 struct gk20a_ctxsw_dev *dev = filp->private_data;
423 int ret;
424
425 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
426 vma->vm_start, vma->vm_end);
427
428 ret = remap_vmalloc_range(vma, dev->hdr, 0);
429 if (likely(!ret)) {
430 vma->vm_private_data = dev;
431 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
432 vma->vm_ops->open(vma);
433 }
434
435 return ret;
436}
437
438#ifdef CONFIG_GK20A_CTXSW_TRACE
439static int gk20a_ctxsw_init_devs(struct gk20a *g)
440{
441 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
442 struct gk20a_ctxsw_dev *dev = trace->devs;
443 int i;
444
445 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
446 dev->g = g;
447 dev->hdr = NULL;
448 dev->write_enabled = false;
449 init_waitqueue_head(&dev->readout_wq);
450 mutex_init(&dev->lock);
451 atomic_set(&dev->vma_ref, 0);
452 dev++;
453 }
454 return 0;
455}
456#endif
457
458int gk20a_ctxsw_trace_init(struct gk20a *g)
459{
460#ifdef CONFIG_GK20A_CTXSW_TRACE
461 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
462 int err;
463
464 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
465
466 if (likely(trace))
467 return 0;
468
469 trace = kzalloc(sizeof(*trace), GFP_KERNEL);
470 if (unlikely(!trace))
471 return -ENOMEM;
472 g->ctxsw_trace = trace;
473
474 err = gk20a_ctxsw_init_devs(g);
475 if (err)
476 goto fail;
477
478 err = g->ops.fecs_trace.init(g);
479 if (unlikely(err))
480 goto fail;
481
482 return 0;
483
484fail:
485 kfree(trace);
486 g->ctxsw_trace = NULL;
487 return err;
488#else
489 return 0;
490#endif
491}
492
493void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
494{
495#ifdef CONFIG_GK20A_CTXSW_TRACE
496 kfree(g->ctxsw_trace);
497 g->ctxsw_trace = NULL;
498
499 g->ops.fecs_trace.deinit(g);
500#endif
501}
502
503int gk20a_ctxsw_trace_write(struct gk20a *g,
504 struct nvgpu_ctxsw_trace_entry *entry)
505{
506 struct nvgpu_ctxsw_ring_header *hdr;
507 struct gk20a_ctxsw_dev *dev;
508 int ret = 0;
509 const char *reason;
510
511 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
512 return -ENODEV;
513
514 dev = &g->ctxsw_trace->devs[entry->vmid];
515 hdr = dev->hdr;
516
517 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
518 "dev=%p hdr=%p", dev, hdr);
519
520 mutex_lock(&dev->lock);
521
522 if (unlikely(!hdr)) {
523 /* device has been released */
524 ret = -ENODEV;
525 goto done;
526 }
527
528 entry->seqno = hdr->write_seqno++;
529
530 if (!dev->write_enabled) {
531 ret = -EBUSY;
532 reason = "write disabled";
533 goto drop;
534 }
535
536 if (unlikely(ring_is_full(hdr))) {
537 ret = -ENOSPC;
538 reason = "user fifo full";
539 goto drop;
540 }
541
542 if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
543 reason = "filtered out";
544 goto filter;
545 }
546
547 gk20a_dbg(gpu_dbg_ctxsw,
548 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
549 entry->seqno, entry->context_id, entry->pid,
550 entry->tag, entry->timestamp);
551
552 dev->ents[hdr->write_idx] = *entry;
553
554 /* ensure record is written before updating write index */
555 smp_wmb();
556
557 hdr->write_idx++;
558 if (unlikely(hdr->write_idx >= hdr->num_ents))
559 hdr->write_idx = 0;
560 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
561 hdr->read_idx, hdr->write_idx, ring_len(hdr));
562
563 mutex_unlock(&dev->lock);
564 return ret;
565
566drop:
567 hdr->drop_count++;
568
569filter:
570 gk20a_dbg(gpu_dbg_ctxsw,
571 "dropping seqno=%d context_id=%08x pid=%lld "
572 "tag=%x time=%llx (%s)",
573 entry->seqno, entry->context_id, entry->pid,
574 entry->tag, entry->timestamp, reason);
575
576done:
577 mutex_unlock(&dev->lock);
578 return ret;
579}
580
581void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
582{
583 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[vmid];
584
585 wake_up_interruptible(&dev->readout_wq);
586}