summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-25 12:56:09 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-29 14:02:15 -0400
commit4d2d890c01b94d10ad55643a4c2c159a98419efe (patch)
tree1043316e5e0dc163da79ae17c089b7818dc54d4b /drivers/gpu/nvgpu/gk20a
parenta681c505c96dba58231ac050e4c8f4f81d79540f (diff)
gpu: nvgpu: Move ctxsw_trace_gk20a.c to common/linux
Migrate ctxsw_trace_gk20a.c to common/linux/ctxsw_trace.c. This has been done becasue the ctxsw tracing code is currently too tightly tied to the Linux OS due to usage of a couple system calls: - poll() - mmap() And general Linux driver framework code. As a result pulling the logic out of the FECS tracing code is simply too large a scope for time time being. Instead the code was just copied as much as possible. The HAL ops for the FECS code was hidden behind the FECS tracing config so that the vm_area_struct is not used when QNX does not define said config. All other non-HAL functions called by the FECS ctxsw tracing code ha now also been hidden by this config. This is not pretty but for the time being it seems like the way to go. JIRA NVGPU-287 Change-Id: Ib880ab237f4abd330dc66998692c86c4507149c2 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586547 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c727
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h18
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h14
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
7 files changed, 48 insertions, 747 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0d011b06..546f4164 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
553 gk20a_dbg_info("freeing bound channel context, timeout=%ld", 553 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
554 timeout); 554 timeout);
555 555
556#ifdef CONFIG_GK20A_CTXSW_TRACE
556 if (g->ops.fecs_trace.unbind_channel && !ch->vpr) 557 if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
557 g->ops.fecs_trace.unbind_channel(g, ch); 558 g->ops.fecs_trace.unbind_channel(g, ch);
559#endif
558 560
559 /* release channel ctx */ 561 /* release channel ctx */
560 g->ops.gr.free_channel_ctx(ch, was_tsg); 562 g->ops.gr.free_channel_ctx(ch, was_tsg);
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
deleted file mode 100644
index fb33de23..00000000
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ /dev/null
@@ -1,727 +0,0 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <asm/barrier.h>
24#include <linux/wait.h>
25#include <linux/ktime.h>
26#include <linux/uaccess.h>
27#include <linux/poll.h>
28#include <trace/events/gk20a.h>
29#include <uapi/linux/nvgpu.h>
30
31#include <nvgpu/kmem.h>
32
33#include "ctxsw_trace_gk20a.h"
34#include "gk20a.h"
35#include "platform_gk20a.h"
36#include "gr_gk20a.h"
37#include "common/linux/os_linux.h"
38
39#include <nvgpu/log.h>
40#include <nvgpu/atomic.h>
41#include <nvgpu/barrier.h>
42
43#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
45
46#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
47
48/* Userland-facing FIFO (one global + eventually one per VM) */
49struct gk20a_ctxsw_dev {
50 struct gk20a *g;
51
52 struct nvgpu_ctxsw_ring_header *hdr;
53 struct nvgpu_ctxsw_trace_entry *ents;
54 struct nvgpu_ctxsw_trace_filter filter;
55 bool write_enabled;
56 struct nvgpu_cond readout_wq;
57 size_t size;
58 u32 num_ents;
59
60 nvgpu_atomic_t vma_ref;
61
62 struct nvgpu_mutex write_lock;
63};
64
65
66struct gk20a_ctxsw_trace {
67 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
68};
69
70static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
71{
72 return (hdr->write_idx == hdr->read_idx);
73}
74
75static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
76{
77 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
78}
79
80static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
81{
82 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
83}
84
85ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
86 loff_t *off)
87{
88 struct gk20a_ctxsw_dev *dev = filp->private_data;
89 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
90 struct nvgpu_ctxsw_trace_entry __user *entry =
91 (struct nvgpu_ctxsw_trace_entry *) buf;
92 size_t copied = 0;
93 int err;
94
95 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
96 "filp=%p buf=%p size=%zu", filp, buf, size);
97
98 nvgpu_mutex_acquire(&dev->write_lock);
99 while (ring_is_empty(hdr)) {
100 nvgpu_mutex_release(&dev->write_lock);
101 if (filp->f_flags & O_NONBLOCK)
102 return -EAGAIN;
103 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
104 !ring_is_empty(hdr), 0);
105 if (err)
106 return err;
107 nvgpu_mutex_acquire(&dev->write_lock);
108 }
109
110 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
111 if (ring_is_empty(hdr))
112 break;
113
114 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
115 sizeof(*entry))) {
116 nvgpu_mutex_release(&dev->write_lock);
117 return -EFAULT;
118 }
119
120 hdr->read_idx++;
121 if (hdr->read_idx >= hdr->num_ents)
122 hdr->read_idx = 0;
123
124 entry++;
125 copied += sizeof(*entry);
126 size -= sizeof(*entry);
127 }
128
129 gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
130 hdr->read_idx);
131
132 *off = hdr->read_idx;
133 nvgpu_mutex_release(&dev->write_lock);
134
135 return copied;
136}
137
138static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
139{
140 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
141 nvgpu_mutex_acquire(&dev->write_lock);
142 dev->write_enabled = true;
143 nvgpu_mutex_release(&dev->write_lock);
144 dev->g->ops.fecs_trace.enable(dev->g);
145 return 0;
146}
147
148static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
149{
150 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
151 dev->g->ops.fecs_trace.disable(dev->g);
152 nvgpu_mutex_acquire(&dev->write_lock);
153 dev->write_enabled = false;
154 nvgpu_mutex_release(&dev->write_lock);
155 return 0;
156}
157
158static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
159 size_t size)
160{
161 struct gk20a *g = dev->g;
162 void *buf;
163 int err;
164
165 if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
166 return -EBUSY;
167
168 err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
169 if (err)
170 return err;
171
172
173 dev->hdr = buf;
174 dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
175 dev->size = size;
176 dev->num_ents = dev->hdr->num_ents;
177
178 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
179 dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
180 return 0;
181}
182
183int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
184 void **buf, size_t *size)
185{
186 struct nvgpu_ctxsw_ring_header *hdr;
187
188 *size = roundup(*size, PAGE_SIZE);
189 hdr = vmalloc_user(*size);
190 if (!hdr)
191 return -ENOMEM;
192
193 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
194 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
195 hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
196 / sizeof(struct nvgpu_ctxsw_trace_entry);
197 hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
198 hdr->drop_count = 0;
199 hdr->read_idx = 0;
200 hdr->write_idx = 0;
201 hdr->write_seqno = 0;
202
203 *buf = hdr;
204 return 0;
205}
206
207int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
208{
209 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
210
211 nvgpu_vfree(g, dev->hdr);
212 return 0;
213}
214
215static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
216 struct nvgpu_ctxsw_ring_setup_args *args)
217{
218 size_t size = args->size;
219 int ret;
220
221 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
222
223 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
224 return -EINVAL;
225
226 nvgpu_mutex_acquire(&dev->write_lock);
227 ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
228 nvgpu_mutex_release(&dev->write_lock);
229
230 return ret;
231}
232
233static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
234 struct nvgpu_ctxsw_trace_filter_args *args)
235{
236 struct gk20a *g = dev->g;
237
238 nvgpu_mutex_acquire(&dev->write_lock);
239 dev->filter = args->filter;
240 nvgpu_mutex_release(&dev->write_lock);
241
242 if (g->ops.fecs_trace.set_filter)
243 g->ops.fecs_trace.set_filter(g, &dev->filter);
244 return 0;
245}
246
247static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
248 struct nvgpu_ctxsw_trace_filter_args *args)
249{
250 nvgpu_mutex_acquire(&dev->write_lock);
251 args->filter = dev->filter;
252 nvgpu_mutex_release(&dev->write_lock);
253
254 return 0;
255}
256
257static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
258{
259 struct gk20a *g = dev->g;
260 int err;
261
262 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
263
264 err = gk20a_busy(g);
265 if (err)
266 return err;
267
268 if (g->ops.fecs_trace.flush)
269 err = g->ops.fecs_trace.flush(g);
270
271 if (likely(!err))
272 err = g->ops.fecs_trace.poll(g);
273
274 gk20a_idle(g);
275 return err;
276}
277
278int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
279{
280 struct nvgpu_os_linux *l;
281 struct gk20a *g;
282 struct gk20a_ctxsw_trace *trace;
283 struct gk20a_ctxsw_dev *dev;
284 int err;
285 size_t size;
286 u32 n;
287
288 /* only one VM for now */
289 const int vmid = 0;
290
291 l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
292 g = gk20a_get(&l->g);
293 if (!g)
294 return -ENODEV;
295
296 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
297
298 if (!capable(CAP_SYS_ADMIN)) {
299 err = -EPERM;
300 goto free_ref;
301 }
302
303 err = gk20a_busy(g);
304 if (err)
305 goto free_ref;
306
307 trace = g->ctxsw_trace;
308 if (!trace) {
309 err = -ENODEV;
310 goto idle;
311 }
312
313 /* Allow only one user for this device */
314 dev = &trace->devs[vmid];
315 nvgpu_mutex_acquire(&dev->write_lock);
316 if (dev->hdr) {
317 err = -EBUSY;
318 goto done;
319 }
320
321 /* By default, allocate ring buffer big enough to accommodate
322 * FECS records with default event filter */
323
324 /* enable all traces by default */
325 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
326
327 /* compute max number of entries generated with this filter */
328 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
329
330 size = sizeof(struct nvgpu_ctxsw_ring_header) +
331 n * sizeof(struct nvgpu_ctxsw_trace_entry);
332 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
333 size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
334
335 err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
336 if (!err) {
337 filp->private_data = dev;
338 gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
339 filp, dev, size);
340 }
341
342done:
343 nvgpu_mutex_release(&dev->write_lock);
344
345idle:
346 gk20a_idle(g);
347free_ref:
348 if (err)
349 gk20a_put(g);
350 return err;
351}
352
353int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
354{
355 struct gk20a_ctxsw_dev *dev = filp->private_data;
356 struct gk20a *g = dev->g;
357
358 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
359
360 g->ops.fecs_trace.disable(g);
361
362 nvgpu_mutex_acquire(&dev->write_lock);
363 dev->write_enabled = false;
364 nvgpu_mutex_release(&dev->write_lock);
365
366 if (dev->hdr) {
367 dev->g->ops.fecs_trace.free_user_buffer(dev->g);
368 dev->hdr = NULL;
369 }
370 gk20a_put(g);
371 return 0;
372}
373
374long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
375 unsigned long arg)
376{
377 struct gk20a_ctxsw_dev *dev = filp->private_data;
378 struct gk20a *g = dev->g;
379 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
380 int err = 0;
381
382 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
383
384 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
385 (_IOC_NR(cmd) == 0) ||
386 (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
387 (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
388 return -EINVAL;
389
390 memset(buf, 0, sizeof(buf));
391 if (_IOC_DIR(cmd) & _IOC_WRITE) {
392 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
393 return -EFAULT;
394 }
395
396 switch (cmd) {
397 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
398 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
399 break;
400 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
401 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
402 break;
403 case NVGPU_CTXSW_IOCTL_RING_SETUP:
404 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
405 (struct nvgpu_ctxsw_ring_setup_args *) buf);
406 break;
407 case NVGPU_CTXSW_IOCTL_SET_FILTER:
408 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
409 (struct nvgpu_ctxsw_trace_filter_args *) buf);
410 break;
411 case NVGPU_CTXSW_IOCTL_GET_FILTER:
412 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
413 (struct nvgpu_ctxsw_trace_filter_args *) buf);
414 break;
415 case NVGPU_CTXSW_IOCTL_POLL:
416 err = gk20a_ctxsw_dev_ioctl_poll(dev);
417 break;
418 default:
419 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
420 cmd);
421 err = -ENOTTY;
422 }
423
424 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
425 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
426
427 return err;
428}
429
430unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
431{
432 struct gk20a_ctxsw_dev *dev = filp->private_data;
433 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
434 unsigned int mask = 0;
435
436 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
437
438 nvgpu_mutex_acquire(&dev->write_lock);
439 poll_wait(filp, &dev->readout_wq.wq, wait);
440 if (!ring_is_empty(hdr))
441 mask |= POLLIN | POLLRDNORM;
442 nvgpu_mutex_release(&dev->write_lock);
443
444 return mask;
445}
446
447static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
448{
449 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
450
451 nvgpu_atomic_inc(&dev->vma_ref);
452 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
453 nvgpu_atomic_read(&dev->vma_ref));
454}
455
456static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
457{
458 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
459
460 nvgpu_atomic_dec(&dev->vma_ref);
461 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
462 nvgpu_atomic_read(&dev->vma_ref));
463}
464
465static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
466 .open = gk20a_ctxsw_dev_vma_open,
467 .close = gk20a_ctxsw_dev_vma_close,
468};
469
470int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
471 struct vm_area_struct *vma)
472{
473 return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
474}
475
476int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
477{
478 struct gk20a_ctxsw_dev *dev = filp->private_data;
479 int ret;
480
481 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
482 vma->vm_start, vma->vm_end);
483
484 ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
485 if (likely(!ret)) {
486 vma->vm_private_data = dev;
487 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
488 vma->vm_ops->open(vma);
489 }
490
491 return ret;
492}
493
494#ifdef CONFIG_GK20A_CTXSW_TRACE
495static int gk20a_ctxsw_init_devs(struct gk20a *g)
496{
497 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
498 struct gk20a_ctxsw_dev *dev = trace->devs;
499 int err;
500 int i;
501
502 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
503 dev->g = g;
504 dev->hdr = NULL;
505 dev->write_enabled = false;
506 nvgpu_cond_init(&dev->readout_wq);
507 err = nvgpu_mutex_init(&dev->write_lock);
508 if (err)
509 return err;
510 nvgpu_atomic_set(&dev->vma_ref, 0);
511 dev++;
512 }
513 return 0;
514}
515#endif
516
517int gk20a_ctxsw_trace_init(struct gk20a *g)
518{
519#ifdef CONFIG_GK20A_CTXSW_TRACE
520 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
521 int err;
522
523 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
524
525 /* if tracing is not supported, skip this */
526 if (!g->ops.fecs_trace.init)
527 return 0;
528
529 if (likely(trace))
530 return 0;
531
532 trace = nvgpu_kzalloc(g, sizeof(*trace));
533 if (unlikely(!trace))
534 return -ENOMEM;
535 g->ctxsw_trace = trace;
536
537 err = gk20a_ctxsw_init_devs(g);
538 if (err)
539 goto fail;
540
541 err = g->ops.fecs_trace.init(g);
542 if (unlikely(err))
543 goto fail;
544
545 return 0;
546
547fail:
548 memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
549 nvgpu_kfree(g, trace);
550 g->ctxsw_trace = NULL;
551 return err;
552#else
553 return 0;
554#endif
555}
556
557void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
558{
559#ifdef CONFIG_GK20A_CTXSW_TRACE
560 struct gk20a_ctxsw_trace *trace;
561 struct gk20a_ctxsw_dev *dev;
562 int i;
563
564 if (!g->ctxsw_trace)
565 return;
566
567 trace = g->ctxsw_trace;
568 dev = trace->devs;
569
570 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
571 nvgpu_mutex_destroy(&dev->write_lock);
572 dev++;
573 }
574
575 nvgpu_kfree(g, g->ctxsw_trace);
576 g->ctxsw_trace = NULL;
577
578 g->ops.fecs_trace.deinit(g);
579#endif
580}
581
582int gk20a_ctxsw_trace_write(struct gk20a *g,
583 struct nvgpu_ctxsw_trace_entry *entry)
584{
585 struct nvgpu_ctxsw_ring_header *hdr;
586 struct gk20a_ctxsw_dev *dev;
587 int ret = 0;
588 const char *reason;
589 u32 write_idx;
590
591 if (!g->ctxsw_trace)
592 return 0;
593
594 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
595 return -ENODEV;
596
597 dev = &g->ctxsw_trace->devs[entry->vmid];
598 hdr = dev->hdr;
599
600 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
601 "dev=%p hdr=%p", dev, hdr);
602
603 nvgpu_mutex_acquire(&dev->write_lock);
604
605 if (unlikely(!hdr)) {
606 /* device has been released */
607 ret = -ENODEV;
608 goto done;
609 }
610
611 write_idx = hdr->write_idx;
612 if (write_idx >= dev->num_ents) {
613 nvgpu_err(dev->g,
614 "write_idx=%u out of range [0..%u]",
615 write_idx, dev->num_ents);
616 ret = -ENOSPC;
617 reason = "write_idx out of range";
618 goto disable;
619 }
620
621 entry->seqno = hdr->write_seqno++;
622
623 if (!dev->write_enabled) {
624 ret = -EBUSY;
625 reason = "write disabled";
626 goto drop;
627 }
628
629 if (unlikely(ring_is_full(hdr))) {
630 ret = -ENOSPC;
631 reason = "user fifo full";
632 goto drop;
633 }
634
635 if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
636 reason = "filtered out";
637 goto filter;
638 }
639
640 gk20a_dbg(gpu_dbg_ctxsw,
641 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
642 entry->seqno, entry->context_id, entry->pid,
643 entry->tag, entry->timestamp);
644
645 dev->ents[write_idx] = *entry;
646
647 /* ensure record is written before updating write index */
648 nvgpu_smp_wmb();
649
650 write_idx++;
651 if (unlikely(write_idx >= hdr->num_ents))
652 write_idx = 0;
653 hdr->write_idx = write_idx;
654 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
655 hdr->read_idx, hdr->write_idx, ring_len(hdr));
656
657 nvgpu_mutex_release(&dev->write_lock);
658 return ret;
659
660disable:
661 g->ops.fecs_trace.disable(g);
662
663drop:
664 hdr->drop_count++;
665
666filter:
667 gk20a_dbg(gpu_dbg_ctxsw,
668 "dropping seqno=%d context_id=%08x pid=%lld "
669 "tag=%x time=%llx (%s)",
670 entry->seqno, entry->context_id, entry->pid,
671 entry->tag, entry->timestamp, reason);
672
673done:
674 nvgpu_mutex_release(&dev->write_lock);
675 return ret;
676}
677
678void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
679{
680 struct gk20a_ctxsw_dev *dev;
681
682 if (!g->ctxsw_trace)
683 return;
684
685 dev = &g->ctxsw_trace->devs[vmid];
686 nvgpu_cond_signal_interruptible(&dev->readout_wq);
687}
688
689void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
690{
691#ifdef CONFIG_GK20A_CTXSW_TRACE
692 struct nvgpu_ctxsw_trace_entry entry = {
693 .vmid = 0,
694 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
695 .context_id = 0,
696 .pid = ch->tgid,
697 };
698
699 if (!g->ctxsw_trace)
700 return;
701
702 g->ops.bus.read_ptimer(g, &entry.timestamp);
703 gk20a_ctxsw_trace_write(g, &entry);
704 gk20a_ctxsw_trace_wake_up(g, 0);
705#endif
706 trace_gk20a_channel_reset(ch->chid, ch->tsgid);
707}
708
709void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
710{
711#ifdef CONFIG_GK20A_CTXSW_TRACE
712 struct nvgpu_ctxsw_trace_entry entry = {
713 .vmid = 0,
714 .tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
715 .context_id = 0,
716 .pid = tsg->tgid,
717 };
718
719 if (!g->ctxsw_trace)
720 return;
721
722 g->ops.bus.read_ptimer(g, &entry.timestamp);
723 gk20a_ctxsw_trace_write(g, &entry);
724 gk20a_ctxsw_trace_wake_up(g, 0);
725#endif
726 trace_gk20a_channel_reset(~0, tsg->tsgid);
727}
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
index b270581b..dddb8603 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
@@ -23,6 +23,8 @@
23#ifndef __CTXSW_TRACE_GK20A_H 23#ifndef __CTXSW_TRACE_GK20A_H
24#define __CTXSW_TRACE_GK20A_H 24#define __CTXSW_TRACE_GK20A_H
25 25
26#include <nvgpu/types.h>
27
26#define GK20A_CTXSW_TRACE_NUM_DEVS 1 28#define GK20A_CTXSW_TRACE_NUM_DEVS 1
27 29
28struct file; 30struct file;
@@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
41int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); 43int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
42long gk20a_ctxsw_dev_ioctl(struct file *filp, 44long gk20a_ctxsw_dev_ioctl(struct file *filp,
43 unsigned int cmd, unsigned long arg); 45 unsigned int cmd, unsigned long arg);
44ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); 46ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
45unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); 47 size_t size, loff_t *offs);
46int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); 48unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
49 struct poll_table_struct *pts);
50int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma);
47int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); 51int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size);
48int gk20a_ctxsw_dev_ring_free(struct gk20a *g); 52int gk20a_ctxsw_dev_ring_free(struct gk20a *g);
49int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); 53int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma);
50 54
51int gk20a_ctxsw_trace_init(struct gk20a *); 55int gk20a_ctxsw_trace_init(struct gk20a *g);
52void gk20a_ctxsw_trace_cleanup(struct gk20a *); 56void gk20a_ctxsw_trace_cleanup(struct gk20a *g);
53int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); 57int gk20a_ctxsw_trace_write(struct gk20a *g,
58 struct nvgpu_ctxsw_trace_entry *entry);
54void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); 59void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
55 60
56void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); 61void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch);
57void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); 62void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg);
58 63
59
60#endif /* __CTXSW_TRACE_GK20A_H */ 64#endif /* __CTXSW_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index d10af9e9..17ae626b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
1228 if (nvgpu_pmu_disable_elpg(g)) 1228 if (nvgpu_pmu_disable_elpg(g))
1229 nvgpu_err(g, "failed to set disable elpg"); 1229 nvgpu_err(g, "failed to set disable elpg");
1230 } 1230 }
1231 /* resetting engine will alter read/write index. 1231
1232 * need to flush circular buffer before re-enabling FECS. 1232#ifdef CONFIG_GK20A_CTXSW_TRACE
1233 /*
1234 * Resetting engine will alter read/write index. Need to flush
1235 * circular buffer before re-enabling FECS.
1233 */ 1236 */
1234 if (g->ops.fecs_trace.reset) 1237 if (g->ops.fecs_trace.reset)
1235 g->ops.fecs_trace.reset(g); 1238 g->ops.fecs_trace.reset(g);
1236 /*HALT_PIPELINE method, halt GR engine*/ 1239#endif
1240
1241 /* HALT_PIPELINE method, halt GR engine. */
1237 if (gr_gk20a_halt_pipe(g)) 1242 if (gr_gk20a_halt_pipe(g))
1238 nvgpu_err(g, "failed to HALT gr pipe"); 1243 nvgpu_err(g, "failed to HALT gr pipe");
1239 /* resetting engine using mc_enable_r() is not 1244
1240 enough, we do full init sequence */ 1245 /*
1246 * Resetting engine using mc_enable_r() is not enough; we must
1247 * do full init sequence.
1248 */
1241 gk20a_gr_reset(g); 1249 gk20a_gr_reset(g);
1242 if (g->support_pmu && g->can_elpg) 1250 if (g->support_pmu && g->can_elpg)
1243 nvgpu_pmu_enable_elpg(g); 1251 nvgpu_pmu_enable_elpg(g);
@@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault(
1618 } 1626 }
1619 } 1627 }
1620 } 1628 }
1629
1630#ifdef CONFIG_GK20A_CTXSW_TRACE
1621 /* 1631 /*
1622 * For non fake mmu fault, both tsg and ch pointers 1632 * For non fake mmu fault, both tsg and ch pointers
1623 * could be valid. Check tsg first. 1633 * could be valid. Check tsg first.
@@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault(
1626 gk20a_ctxsw_trace_tsg_reset(g, tsg); 1636 gk20a_ctxsw_trace_tsg_reset(g, tsg);
1627 else if (ch) 1637 else if (ch)
1628 gk20a_ctxsw_trace_channel_reset(g, ch); 1638 gk20a_ctxsw_trace_channel_reset(g, ch);
1639#endif
1629 1640
1630 /* disable the channel/TSG from hw and increment 1641 /*
1631 * syncpoints */ 1642 * Disable the channel/TSG from hw and increment syncpoints.
1632 1643 */
1633 if (tsg) { 1644 if (tsg) {
1634 if (!g->fifo.deferred_reset_pending) { 1645 if (!g->fifo.deferred_reset_pending) {
1635 if (!fake_fault) 1646 if (!fake_fault)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 47f6c56c..703a7c0c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g)
295 goto done; 295 goto done;
296 } 296 }
297 297
298#ifdef CONFIG_GK20A_CTXSW_TRACE
298 err = gk20a_ctxsw_trace_init(g); 299 err = gk20a_ctxsw_trace_init(g);
299 if (err) 300 if (err)
300 nvgpu_warn(g, "could not initialize ctxsw tracing"); 301 nvgpu_warn(g, "could not initialize ctxsw tracing");
302#endif
301 303
302 err = gk20a_sched_ctrl_init(g); 304 err = gk20a_sched_ctrl_init(g);
303 if (err) { 305 if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d7fdffb0..a34f06b2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -689,18 +689,25 @@ struct gpu_ops {
689 int (*get_netlist_name)(struct gk20a *g, int index, char *name); 689 int (*get_netlist_name)(struct gk20a *g, int index, char *name);
690 bool (*is_fw_defined)(void); 690 bool (*is_fw_defined)(void);
691 } gr_ctx; 691 } gr_ctx;
692#ifdef CONFIG_GK20A_CTXSW_TRACE
693 /*
694 * Currently only supported on Linux due to the extremely tight
695 * integration with Linux device driver structure (in particular
696 * mmap).
697 */
692 struct { 698 struct {
693 int (*init)(struct gk20a *g); 699 int (*init)(struct gk20a *g);
694 int (*max_entries)(struct gk20a *, 700 int (*max_entries)(struct gk20a *,
695 struct nvgpu_ctxsw_trace_filter *); 701 struct nvgpu_ctxsw_trace_filter *filter);
696 int (*flush)(struct gk20a *g); 702 int (*flush)(struct gk20a *g);
697 int (*poll)(struct gk20a *g); 703 int (*poll)(struct gk20a *g);
698 int (*enable)(struct gk20a *g); 704 int (*enable)(struct gk20a *g);
699 int (*disable)(struct gk20a *g); 705 int (*disable)(struct gk20a *g);
700 bool (*is_enabled)(struct gk20a *g); 706 bool (*is_enabled)(struct gk20a *g);
701 int (*reset)(struct gk20a *g); 707 int (*reset)(struct gk20a *g);
702 int (*bind_channel)(struct gk20a *, struct channel_gk20a *); 708 int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch);
703 int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); 709 int (*unbind_channel)(struct gk20a *g,
710 struct channel_gk20a *ch);
704 int (*deinit)(struct gk20a *g); 711 int (*deinit)(struct gk20a *g);
705 int (*alloc_user_buffer)(struct gk20a *g, 712 int (*alloc_user_buffer)(struct gk20a *g,
706 void **buf, size_t *size); 713 void **buf, size_t *size);
@@ -710,6 +717,7 @@ struct gpu_ops {
710 int (*set_filter)(struct gk20a *g, 717 int (*set_filter)(struct gk20a *g,
711 struct nvgpu_ctxsw_trace_filter *filter); 718 struct nvgpu_ctxsw_trace_filter *filter);
712 } fecs_trace; 719 } fecs_trace;
720#endif
713 struct { 721 struct {
714 bool (*support_sparse)(struct gk20a *g); 722 bool (*support_sparse)(struct gk20a *g);
715 u64 (*gmmu_map)(struct vm_gk20a *vm, 723 u64 (*gmmu_map)(struct vm_gk20a *vm,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1ea59a9d..f78d862c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3070 "fail to load golden ctx image"); 3070 "fail to load golden ctx image");
3071 goto out; 3071 goto out;
3072 } 3072 }
3073#ifdef CONFIG_GK20A_CTXSW_TRACE
3073 if (g->ops.fecs_trace.bind_channel && !c->vpr) { 3074 if (g->ops.fecs_trace.bind_channel && !c->vpr) {
3074 err = g->ops.fecs_trace.bind_channel(g, c); 3075 err = g->ops.fecs_trace.bind_channel(g, c);
3075 if (err) { 3076 if (err)
3076 nvgpu_warn(g, 3077 nvgpu_warn(g,
3077 "fail to bind channel for ctxsw trace"); 3078 "fail to bind channel for ctxsw trace");
3078 }
3079 } 3079 }
3080#endif
3080 c->first_init = true; 3081 c->first_init = true;
3081 } 3082 }
3082 3083