summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorAnton Vorontsov <avorontsov@nvidia.com>2015-08-19 17:27:51 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-23 10:48:47 -0400
commit1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch)
tree8b93fcd00739f9ada9302f06175278c9cb1d6785 /drivers/gpu/nvgpu/gk20a
parent82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff)
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c586
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h41
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c763
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h20
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c19
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c49
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h26
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h190
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h2
12 files changed, 1700 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9b1f2987..0dd1fb8b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -28,6 +28,7 @@
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29 29
30#include "debug_gk20a.h" 30#include "debug_gk20a.h"
31#include "ctxsw_trace_gk20a.h"
31 32
32#include "gk20a.h" 33#include "gk20a.h"
33#include "dbg_gpu_gk20a.h" 34#include "dbg_gpu_gk20a.h"
@@ -920,6 +921,9 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
920 921
921 gk20a_free_error_notifiers(ch); 922 gk20a_free_error_notifiers(ch);
922 923
924 if (g->ops.fecs_trace.unbind_channel)
925 g->ops.fecs_trace.unbind_channel(g, ch);
926
923 /* release channel ctx */ 927 /* release channel ctx */
924 g->ops.gr.free_channel_ctx(ch); 928 g->ops.gr.free_channel_ctx(ch);
925 929
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
new file mode 100644
index 00000000..9e7c04ad
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -0,0 +1,586 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <asm/barrier.h>
15#include <linux/slab.h>
16#include <linux/kthread.h>
17#include <linux/circ_buf.h>
18#include <linux/delay.h>
19#include <linux/jiffies.h>
20#include <linux/wait.h>
21#include <linux/ktime.h>
22#include <linux/nvgpu.h>
23#include <linux/hashtable.h>
24#include <linux/debugfs.h>
25#include <linux/log2.h>
26#include <uapi/linux/nvgpu.h>
27#include "ctxsw_trace_gk20a.h"
28#include "gk20a.h"
29#include "gr_gk20a.h"
30#include "hw_ctxsw_prog_gk20a.h"
31#include "hw_gr_gk20a.h"
32
33#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
34
35/* Userland-facing FIFO (one global + eventually one per VM) */
36struct gk20a_ctxsw_dev {
37 struct gk20a *g;
38
39 struct nvgpu_ctxsw_ring_header *hdr;
40 struct nvgpu_ctxsw_trace_entry *ents;
41 struct nvgpu_ctxsw_trace_filter filter;
42 bool write_enabled;
43 wait_queue_head_t readout_wq;
44 size_t size;
45
46 atomic_t vma_ref;
47
48 struct mutex lock;
49};
50
51
52struct gk20a_ctxsw_trace {
53 struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
54};
55
56static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
57{
58 return (hdr->write_idx == hdr->read_idx);
59}
60
61static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
62{
63 return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
64}
65
66static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
67{
68 return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
69}
70
71static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr)
72{
73 return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents;
74}
75
76ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
77 loff_t *off)
78{
79 struct gk20a_ctxsw_dev *dev = filp->private_data;
80 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
81 struct nvgpu_ctxsw_trace_entry __user *entry =
82 (struct nvgpu_ctxsw_trace_entry *) buf;
83 size_t copied = 0;
84 int err;
85
86 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
87 "filp=%p buf=%p size=%zu", filp, buf, size);
88
89 mutex_lock(&dev->lock);
90 while (ring_is_empty(hdr)) {
91 mutex_unlock(&dev->lock);
92 if (filp->f_flags & O_NONBLOCK)
93 return -EAGAIN;
94 err = wait_event_interruptible(dev->readout_wq,
95 !ring_is_empty(hdr));
96 if (err)
97 return err;
98 mutex_lock(&dev->lock);
99 }
100
101 while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
102 if (ring_is_empty(hdr))
103 break;
104
105 if (copy_to_user(entry, &dev->ents[hdr->read_idx],
106 sizeof(*entry))) {
107 mutex_unlock(&dev->lock);
108 return -EFAULT;
109 }
110
111 hdr->read_idx++;
112 if (hdr->read_idx >= hdr->num_ents)
113 hdr->read_idx = 0;
114
115 entry++;
116 copied += sizeof(*entry);
117 size -= sizeof(*entry);
118 }
119
120 gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
121 hdr->read_idx);
122
123 *off = hdr->read_idx;
124 mutex_unlock(&dev->lock);
125
126 return copied;
127}
128
129static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
130{
131 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
132 dev->write_enabled = true;
133 return 0;
134}
135
136static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
137{
138 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
139 dev->write_enabled = false;
140 return 0;
141}
142
143static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev,
144 size_t size)
145{
146 struct nvgpu_ctxsw_ring_header *hdr;
147
148 if (atomic_read(&dev->vma_ref))
149 return -EBUSY;
150
151 if ((dev->write_enabled) || (atomic_read(&dev->vma_ref)))
152 return -EBUSY;
153
154 size = roundup(size, PAGE_SIZE);
155 hdr = vmalloc_user(size);
156 if (!hdr)
157 return -ENOMEM;
158
159 if (dev->hdr)
160 vfree(dev->hdr);
161
162 dev->hdr = hdr;
163 dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
164 dev->size = size;
165
166 hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
167 hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
168 hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header))
169 / sizeof(struct nvgpu_ctxsw_trace_entry);
170 hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
171 hdr->drop_count = 0;
172 hdr->read_idx = 0;
173 hdr->write_idx = 0;
174 hdr->write_seqno = 0;
175
176 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
177 dev->size, dev->hdr, dev->ents, hdr->num_ents);
178 return 0;
179}
180
181static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
182 struct nvgpu_ctxsw_ring_setup_args *args)
183{
184 size_t size = args->size;
185
186 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
187
188 if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
189 return -EINVAL;
190
191 return gk20a_ctxsw_dev_ring_alloc(dev, size);
192}
193
194static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
195 struct nvgpu_ctxsw_trace_filter_args *args)
196{
197 dev->filter = args->filter;
198 return 0;
199}
200
201static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
202 struct nvgpu_ctxsw_trace_filter_args *args)
203{
204 args->filter = dev->filter;
205 return 0;
206}
207
208static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
209{
210 struct gk20a *g = dev->g;
211 int err;
212
213 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
214
215 err = gk20a_busy(g->dev);
216 if (err)
217 return err;
218
219 if (g->ops.fecs_trace.flush(g))
220 err = g->ops.fecs_trace.flush(g);
221
222 if (likely(!err))
223 err = g->ops.fecs_trace.poll(g);
224
225 gk20a_idle(g->dev);
226 return err;
227}
228
229int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
230{
231 struct gk20a *g;
232 struct gk20a_ctxsw_trace *trace;
233 struct gk20a_ctxsw_dev *dev;
234 int err;
235 size_t size;
236 u32 n;
237
238 /* only one VM for now */
239 const int vmid = 0;
240
241 g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
242 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
243
244 if (!capable(CAP_SYS_ADMIN))
245 return -EPERM;
246
247 err = gk20a_busy(g->dev);
248 if (err)
249 return err;
250
251 trace = g->ctxsw_trace;
252 if (!trace) {
253 err = -ENODEV;
254 goto idle;
255 }
256
257 /* Allow only one user for this device */
258 dev = &trace->devs[vmid];
259 mutex_lock(&dev->lock);
260 if (dev->hdr) {
261 err = -EBUSY;
262 goto done;
263 }
264
265 /* By default, allocate ring buffer big enough to accommodate
266 * FECS records with default event filter */
267
268 /* enable all traces by default */
269 NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
270
271 /* compute max number of entries generated with this filter */
272 n = g->ops.fecs_trace.max_entries(g, &dev->filter);
273
274 size = sizeof(struct nvgpu_ctxsw_ring_header) +
275 n * sizeof(struct nvgpu_ctxsw_trace_entry);
276 gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
277 size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
278
279 err = gk20a_ctxsw_dev_ring_alloc(dev, size);
280 if (!err) {
281 filp->private_data = dev;
282 gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
283 filp, dev, size);
284 }
285
286 err = g->ops.fecs_trace.enable(g);
287
288done:
289 mutex_unlock(&dev->lock);
290
291idle:
292 gk20a_idle(g->dev);
293
294 return err;
295}
296
297int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
298{
299 struct gk20a_ctxsw_dev *dev = filp->private_data;
300 struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
301
302 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
303
304 mutex_lock(&dev->lock);
305 dev->write_enabled = false;
306 if (dev->hdr) {
307 vfree(dev->hdr);
308 dev->hdr = NULL;
309 }
310
311 g->ops.fecs_trace.disable(g);
312
313 mutex_unlock(&dev->lock);
314
315 return 0;
316}
317
318long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
319 unsigned long arg)
320{
321 struct gk20a_ctxsw_dev *dev = filp->private_data;
322 struct gk20a *g = dev->g;
323 u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
324 int err = 0;
325
326 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
327
328 if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
329 || (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST))
330 return -EINVAL;
331
332 BUG_ON(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE);
333
334 memset(buf, 0, sizeof(buf));
335 if (_IOC_DIR(cmd) & _IOC_WRITE) {
336 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
337 return -EFAULT;
338 }
339
340 mutex_lock(&dev->lock);
341
342 switch (cmd) {
343 case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
344 err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
345 break;
346 case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
347 err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
348 break;
349 case NVGPU_CTXSW_IOCTL_RING_SETUP:
350 err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
351 (struct nvgpu_ctxsw_ring_setup_args *) buf);
352 break;
353 case NVGPU_CTXSW_IOCTL_SET_FILTER:
354 err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
355 (struct nvgpu_ctxsw_trace_filter_args *) buf);
356 break;
357 case NVGPU_CTXSW_IOCTL_GET_FILTER:
358 err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
359 (struct nvgpu_ctxsw_trace_filter_args *) buf);
360 break;
361 case NVGPU_CTXSW_IOCTL_POLL:
362 mutex_unlock(&dev->lock);
363 err = gk20a_ctxsw_dev_ioctl_poll(dev);
364 mutex_lock(&dev->lock);
365 break;
366 default:
367 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
368 cmd);
369 err = -ENOTTY;
370 }
371
372 mutex_unlock(&dev->lock);
373
374 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
375 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
376
377 return err;
378}
379
380unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
381{
382 struct gk20a_ctxsw_dev *dev = filp->private_data;
383 struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
384 unsigned int mask = 0;
385
386 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
387
388 mutex_lock(&dev->lock);
389 poll_wait(filp, &dev->readout_wq, wait);
390 if (!ring_is_empty(hdr))
391 mask |= POLLIN | POLLRDNORM;
392 mutex_unlock(&dev->lock);
393
394 return mask;
395}
396
397static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
398{
399 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
400
401 atomic_inc(&dev->vma_ref);
402 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
403 atomic_read(&dev->vma_ref));
404}
405
406static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
407{
408 struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
409
410 atomic_dec(&dev->vma_ref);
411 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
412 atomic_read(&dev->vma_ref));
413}
414
415static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
416 .open = gk20a_ctxsw_dev_vma_open,
417 .close = gk20a_ctxsw_dev_vma_close,
418};
419
420int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
421{
422 struct gk20a_ctxsw_dev *dev = filp->private_data;
423 int ret;
424
425 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
426 vma->vm_start, vma->vm_end);
427
428 ret = remap_vmalloc_range(vma, dev->hdr, 0);
429 if (likely(!ret)) {
430 vma->vm_private_data = dev;
431 vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
432 vma->vm_ops->open(vma);
433 }
434
435 return ret;
436}
437
438#ifdef CONFIG_GK20A_CTXSW_TRACE
439static int gk20a_ctxsw_init_devs(struct gk20a *g)
440{
441 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
442 struct gk20a_ctxsw_dev *dev = trace->devs;
443 int i;
444
445 for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
446 dev->g = g;
447 dev->hdr = NULL;
448 dev->write_enabled = false;
449 init_waitqueue_head(&dev->readout_wq);
450 mutex_init(&dev->lock);
451 atomic_set(&dev->vma_ref, 0);
452 dev++;
453 }
454 return 0;
455}
456#endif
457
458int gk20a_ctxsw_trace_init(struct gk20a *g)
459{
460#ifdef CONFIG_GK20A_CTXSW_TRACE
461 struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
462 int err;
463
464 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
465
466 if (likely(trace))
467 return 0;
468
469 trace = kzalloc(sizeof(*trace), GFP_KERNEL);
470 if (unlikely(!trace))
471 return -ENOMEM;
472 g->ctxsw_trace = trace;
473
474 err = gk20a_ctxsw_init_devs(g);
475 if (err)
476 goto fail;
477
478 err = g->ops.fecs_trace.init(g);
479 if (unlikely(err))
480 goto fail;
481
482 return 0;
483
484fail:
485 kfree(trace);
486 g->ctxsw_trace = NULL;
487 return err;
488#else
489 return 0;
490#endif
491}
492
493void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
494{
495#ifdef CONFIG_GK20A_CTXSW_TRACE
496 kfree(g->ctxsw_trace);
497 g->ctxsw_trace = NULL;
498
499 g->ops.fecs_trace.deinit(g);
500#endif
501}
502
503int gk20a_ctxsw_trace_write(struct gk20a *g,
504 struct nvgpu_ctxsw_trace_entry *entry)
505{
506 struct nvgpu_ctxsw_ring_header *hdr;
507 struct gk20a_ctxsw_dev *dev;
508 int ret = 0;
509 const char *reason;
510
511 if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
512 return -ENODEV;
513
514 dev = &g->ctxsw_trace->devs[entry->vmid];
515 hdr = dev->hdr;
516
517 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
518 "dev=%p hdr=%p", dev, hdr);
519
520 mutex_lock(&dev->lock);
521
522 if (unlikely(!hdr)) {
523 /* device has been released */
524 ret = -ENODEV;
525 goto done;
526 }
527
528 entry->seqno = hdr->write_seqno++;
529
530 if (!dev->write_enabled) {
531 ret = -EBUSY;
532 reason = "write disabled";
533 goto drop;
534 }
535
536 if (unlikely(ring_is_full(hdr))) {
537 ret = -ENOSPC;
538 reason = "user fifo full";
539 goto drop;
540 }
541
542 if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
543 reason = "filtered out";
544 goto filter;
545 }
546
547 gk20a_dbg(gpu_dbg_ctxsw,
548 "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
549 entry->seqno, entry->context_id, entry->pid,
550 entry->tag, entry->timestamp);
551
552 dev->ents[hdr->write_idx] = *entry;
553
554 /* ensure record is written before updating write index */
555 smp_wmb();
556
557 hdr->write_idx++;
558 if (unlikely(hdr->write_idx >= hdr->num_ents))
559 hdr->write_idx = 0;
560 gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
561 hdr->read_idx, hdr->write_idx, ring_len(hdr));
562
563 mutex_unlock(&dev->lock);
564 return ret;
565
566drop:
567 hdr->drop_count++;
568
569filter:
570 gk20a_dbg(gpu_dbg_ctxsw,
571 "dropping seqno=%d context_id=%08x pid=%lld "
572 "tag=%x time=%llx (%s)",
573 entry->seqno, entry->context_id, entry->pid,
574 entry->tag, entry->timestamp, reason);
575
576done:
577 mutex_unlock(&dev->lock);
578 return ret;
579}
580
581void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
582{
583 struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[vmid];
584
585 wake_up_interruptible(&dev->readout_wq);
586}
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
new file mode 100644
index 00000000..c57d95d1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#ifndef __CTXSW_TRACE_GK20A_H
15#define __CTXSW_TRACE_GK20A_H
16
17#define GK20A_CTXSW_TRACE_NUM_DEVS 1
18
19struct gk20a;
20struct nvgpu_ctxsw_trace_entry;
21struct channel_gk20a;
22struct channel_ctx_gk20a;
23struct gk20a_ctxsw_dev;
24struct gk20a_fecs_trace;
25
26
27int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
28int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
29long gk20a_ctxsw_dev_ioctl(struct file *filp,
30 unsigned int cmd, unsigned long arg);
31ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *);
32unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *);
33int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *);
34
35int gk20a_ctxsw_trace_init(struct gk20a *);
36int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr);
37void gk20a_ctxsw_trace_cleanup(struct gk20a *);
38int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *);
39void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
40
41#endif /* __CTXSW_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
new file mode 100644
index 00000000..bac36403
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -0,0 +1,763 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <asm/barrier.h>
15#include <linux/slab.h>
16#include <linux/kthread.h>
17#include <linux/circ_buf.h>
18#include <linux/delay.h>
19#include <linux/jiffies.h>
20#include <linux/wait.h>
21#include <linux/ktime.h>
22#include <linux/nvgpu.h>
23#include <linux/hashtable.h>
24#include <linux/debugfs.h>
25#include <linux/log2.h>
26#include <uapi/linux/nvgpu.h>
27#include "ctxsw_trace_gk20a.h"
28#include "fecs_trace_gk20a.h"
29#include "gk20a.h"
30#include "gr_gk20a.h"
31#include "hw_ctxsw_prog_gk20a.h"
32#include "hw_gr_gk20a.h"
33
34/*
35 * If HW circular buffer is getting too many "buffer full" conditions,
36 * increasing this constant should help (it drives Linux' internal buffer size).
37 */
38#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)
39#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
40#define GK20A_FECS_TRACE_FRAME_PERIOD_NS (1000000000ULL/60ULL)
41#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
42
43struct gk20a_fecs_trace_record {
44 u32 magic_lo;
45 u32 magic_hi;
46 u32 context_id;
47 u32 context_ptr;
48 u32 new_context_id;
49 u32 new_context_ptr;
50 u64 ts[];
51};
52
53struct gk20a_fecs_trace_hash_ent {
54 u32 context_ptr;
55 pid_t pid;
56 struct hlist_node node;
57};
58
59struct gk20a_fecs_trace {
60
61 struct mem_desc trace_buf;
62 DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
63 struct mutex hash_lock;
64 struct mutex poll_lock;
65 u64 sof;
66 u32 sof_mask; /* did we already send a SOF for this VM */
67
68 struct task_struct *poll_task;
69};
70
71#ifdef CONFIG_GK20A_CTXSW_TRACE
72static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
73{
74 return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
75}
76
77static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
78{
79 return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
80}
81
82
83static u32 gk20a_fecs_trace_fecs_context_ptr(struct channel_gk20a *ch)
84{
85 return (u32) (sg_phys(ch->inst_block.sgt->sgl) >> 12LL);
86}
87
88static inline int gk20a_fecs_trace_num_ts(void)
89{
90 return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
91 - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
92}
93
94struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
95 struct gk20a_fecs_trace *trace, int idx)
96{
97 return (struct gk20a_fecs_trace_record *)
98 ((u8 *) trace->trace_buf.cpu_va
99 + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
100}
101
102static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
103{
104 /*
105 * testing magic_hi should suffice. magic_lo is sometimes used
106 * as a sequence number in experimental ucode.
107 */
108 return (r->magic_hi
109 == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
110}
111
112static int gk20a_fecs_trace_get_read_index(struct gk20a *g)
113{
114 return gr_gk20a_elpg_protected_call(g,
115 gk20a_readl(g, gr_fecs_mailbox1_r()));
116}
117
118static int gk20a_fecs_trace_get_write_index(struct gk20a *g)
119{
120 return gr_gk20a_elpg_protected_call(g,
121 gk20a_readl(g, gr_fecs_mailbox0_r()));
122}
123
124static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
125{
126 gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index);
127 return gr_gk20a_elpg_protected_call(g,
128 (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
129}
130
131void gk20a_fecs_trace_hash_dump(struct gk20a *g)
132{
133 u32 bkt;
134 struct gk20a_fecs_trace_hash_ent *ent;
135 struct gk20a_fecs_trace *trace = g->fecs_trace;
136
137 gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table");
138
139 mutex_lock(&trace->hash_lock);
140 hash_for_each(trace->pid_hash_table, bkt, ent, node)
141 {
142 gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
143 ent, bkt, ent->context_ptr, ent->pid);
144
145 }
146 mutex_unlock(&trace->hash_lock);
147}
148
149static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
150{
151 struct gk20a_fecs_trace_hash_ent *he;
152 struct gk20a_fecs_trace *trace = g->fecs_trace;
153
154 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
155 "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
156
157 he = kzalloc(sizeof(*he), GFP_KERNEL);
158 if (unlikely(!he)) {
159 gk20a_warn(dev_from_gk20a(g),
160 "can't alloc new hash entry for context_ptr=%x pid=%d",
161 context_ptr, pid);
162 return -ENOMEM;
163 }
164
165 he->context_ptr = context_ptr;
166 he->pid = pid;
167 mutex_lock(&trace->hash_lock);
168 hash_add(trace->pid_hash_table, &he->node, context_ptr);
169 mutex_unlock(&trace->hash_lock);
170 return 0;
171}
172
173static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
174{
175 struct hlist_node *tmp;
176 struct gk20a_fecs_trace_hash_ent *ent;
177 struct gk20a_fecs_trace *trace = g->fecs_trace;
178
179 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
180 "freeing hash entry context_ptr=%x", context_ptr);
181
182 mutex_lock(&trace->hash_lock);
183 hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
184 context_ptr) {
185 if (ent->context_ptr == context_ptr) {
186 hash_del(&ent->node);
187 gk20a_dbg(gpu_dbg_ctxsw,
188 "freed hash entry=%p context_ptr=%x", ent,
189 ent->context_ptr);
190 kfree(ent);
191 break;
192 }
193 }
194 mutex_unlock(&trace->hash_lock);
195}
196
197static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
198{
199 u32 bkt;
200 struct hlist_node *tmp;
201 struct gk20a_fecs_trace_hash_ent *ent;
202 struct gk20a_fecs_trace *trace = g->fecs_trace;
203
204 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
205
206 mutex_lock(&trace->hash_lock);
207 hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
208 hash_del(&ent->node);
209 kfree(ent);
210 }
211 mutex_unlock(&trace->hash_lock);
212
213}
214
215static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
216{
217 struct gk20a_fecs_trace_hash_ent *ent;
218 struct gk20a_fecs_trace *trace = g->fecs_trace;
219 pid_t pid = 0;
220
221 mutex_lock(&trace->hash_lock);
222 hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
223 if (ent->context_ptr == context_ptr) {
224 gk20a_dbg(gpu_dbg_ctxsw,
225 "found context_ptr=%x -> pid=%d",
226 ent->context_ptr, ent->pid);
227 pid = ent->pid;
228 break;
229 }
230 }
231 mutex_unlock(&trace->hash_lock);
232
233 return pid;
234}
235
236/*
237 * Converts HW entry format to userspace-facing format and pushes it to the
238 * queue.
239 */
240static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
241{
242 int i;
243 struct nvgpu_ctxsw_trace_entry entry = { };
244 struct gk20a_fecs_trace *trace = g->fecs_trace;
245 pid_t cur_pid;
246 pid_t new_pid;
247
248 /* for now, only one VM */
249 const int vmid = 0;
250
251 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
252 trace, index);
253
254 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
255 "consuming record trace=%p read=%d record=%p", trace, index, r);
256
257 if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
258 gk20a_warn(dev_from_gk20a(g),
259 "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
260 trace, index, r, r->magic_lo, r->magic_hi);
261 return -EINVAL;
262 }
263
264 cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
265 new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
266
267 gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw,
268 "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
269 r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
270
271 entry.context_id = r->context_id;
272 entry.vmid = vmid;
273
274 /* insert SOF event if needed */
275 if (!(trace->sof_mask & BIT(vmid))) {
276 entry.tag = NVGPU_CTXSW_TAG_SOF;
277 entry.timestamp = trace->sof;
278 entry.context_id = 0;
279 entry.pid = 0;
280
281 gk20a_dbg(gpu_dbg_ctxsw, "SOF time=%llx", entry.timestamp);
282 gk20a_ctxsw_trace_write(g, &entry);
283 trace->sof_mask |= BIT(vmid);
284 }
285
286 /* break out FECS record into trace events */
287 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
288
289 entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
290 entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
291 entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
292
293 gk20a_dbg(gpu_dbg_ctxsw,
294 "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
295 entry.tag, entry.timestamp, r->context_id,
296 r->new_context_id);
297
298 switch (entry.tag) {
299 case NVGPU_CTXSW_TAG_RESTORE_START:
300 case NVGPU_CTXSW_TAG_CONTEXT_START:
301 entry.context_id = r->new_context_id;
302 entry.pid = new_pid;
303 break;
304
305 case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
306 case NVGPU_CTXSW_TAG_FE_ACK:
307 case NVGPU_CTXSW_TAG_FE_ACK_WFI:
308 case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
309 case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
310 case NVGPU_CTXSW_TAG_FE_ACK_CILP:
311 case NVGPU_CTXSW_TAG_SAVE_END:
312 entry.context_id = r->context_id;
313 entry.pid = cur_pid;
314 break;
315
316 default:
317 /* tags are not guaranteed to start at the beginning */
318 WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP));
319 continue;
320 }
321
322 gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
323 entry.tag, entry.context_id, entry.pid);
324
325 if (!entry.context_id)
326 continue;
327
328 gk20a_ctxsw_trace_write(g, &entry);
329 }
330
331 gk20a_ctxsw_trace_wake_up(g, vmid);
332 return 0;
333}
334
335static int gk20a_fecs_trace_poll(struct gk20a *g)
336{
337 struct gk20a_fecs_trace *trace = g->fecs_trace;
338
339 int read = 0;
340 int write = 0;
341 int cnt;
342 int err;
343
344 err = gk20a_busy(g->dev);
345 if (unlikely(err))
346 return err;
347
348 mutex_lock(&trace->poll_lock);
349 write = gk20a_fecs_trace_get_write_index(g);
350 if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
351 gk20a_err(dev_from_gk20a(g),
352 "failed to acquire write index, write=%d", write);
353 err = write;
354 goto done;
355 }
356
357 read = gk20a_fecs_trace_get_read_index(g);
358
359 cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
360 if (!cnt)
361 goto done;
362
363 gk20a_dbg(gpu_dbg_ctxsw,
364 "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
365 read, gk20a_fecs_trace_get_read_index(g), write, cnt);
366
367 /* we did not send any SOF yet */
368 trace->sof_mask = 0;
369
370 /* consume all records */
371 while (read != write) {
372 gk20a_fecs_trace_ring_read(g, read);
373
374 /* Get to next record. */
375 read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
376 gk20a_fecs_trace_set_read_index(g, read);
377 }
378
379done:
380 /*
381 * OK, we read out all the entries... a new "frame" starts here.
382 * We remember the Start Of Frame time and insert it on the next
383 * iteration.
384 */
385 trace->sof = gk20a_read_ptimer(g);
386
387 mutex_unlock(&trace->poll_lock);
388 gk20a_idle(g->dev);
389 return err;
390}
391
392static int gk20a_fecs_trace_periodic_polling(void *arg)
393{
394 struct gk20a *g = (struct gk20a *)arg;
395 struct timespec ts = ns_to_timespec(GK20A_FECS_TRACE_FRAME_PERIOD_NS);
396
397 pr_info("%s: running\n", __func__);
398
399 while (!kthread_should_stop()) {
400
401 hrtimer_nanosleep(&ts, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
402
403 gk20a_fecs_trace_poll(g);
404 }
405
406 return 0;
407}
408
409static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
410{
411 struct gk20a_fecs_trace *trace = g->fecs_trace;
412
413 return gk20a_gmmu_alloc(g, GK20A_FECS_TRACE_NUM_RECORDS
414 * ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
415 &trace->trace_buf);
416}
417
418static void gk20a_fecs_trace_free_ring(struct gk20a *g)
419{
420 struct gk20a_fecs_trace *trace = g->fecs_trace;
421
422 gk20a_gmmu_free(g, &trace->trace_buf);
423}
424
425#ifdef CONFIG_DEBUG_FS
426/*
427 * The sequence iterator functions. We simply use the count of the
428 * next line as our internal position.
429 */
430static void *gk20a_fecs_trace_debugfs_ring_seq_start(
431 struct seq_file *s, loff_t *pos)
432{
433 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
434 return NULL;
435
436 return pos;
437}
438
439static void *gk20a_fecs_trace_debugfs_ring_seq_next(
440 struct seq_file *s, void *v, loff_t *pos)
441{
442 ++(*pos);
443 if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
444 return NULL;
445 return pos;
446}
447
448static void gk20a_fecs_trace_debugfs_ring_seq_stop(
449 struct seq_file *s, void *v)
450{
451}
452
453static int gk20a_fecs_trace_debugfs_ring_seq_show(
454 struct seq_file *s, void *v)
455{
456 loff_t *pos = (loff_t *) v;
457 struct gk20a *g = *(struct gk20a **)s->private;
458 struct gk20a_fecs_trace *trace = g->fecs_trace;
459 struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
460 int i;
461 const u32 invalid_tag =
462 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
463 u32 tag;
464 u64 timestamp;
465
466 seq_printf(s, "record #%lld (%p)\n", *pos, r);
467 seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
468 seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
469 if (gk20a_fecs_trace_is_valid_record(r)) {
470 seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
471 seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
472 seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
473 seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
474 for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
475 tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
476 if (tag == invalid_tag)
477 continue;
478 timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
479 timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
480 seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
481 }
482 }
483 return 0;
484}
485
486/*
487 * Tie them all together into a set of seq_operations.
488 */
489const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = {
490 .start = gk20a_fecs_trace_debugfs_ring_seq_start,
491 .next = gk20a_fecs_trace_debugfs_ring_seq_next,
492 .stop = gk20a_fecs_trace_debugfs_ring_seq_stop,
493 .show = gk20a_fecs_trace_debugfs_ring_seq_show
494};
495
496/*
497 * Time to set up the file operations for our /proc file. In this case,
498 * all we need is an open function which sets up the sequence ops.
499 */
500
501static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode,
502 struct file *file)
503{
504 struct gk20a **p;
505
506 if (!capable(CAP_SYS_ADMIN))
507 return -EPERM;
508
509 p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops,
510 sizeof(struct gk20a *));
511 if (!p)
512 return -ENOMEM;
513
514 *p = (struct gk20a *)inode->i_private;
515 return 0;
516};
517
518/*
519 * The file operations structure contains our open function along with
520 * set of the canned seq_ ops.
521 */
522const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = {
523 .owner = THIS_MODULE,
524 .open = gk20a_ctxsw_debugfs_ring_open,
525 .read = seq_read,
526 .llseek = seq_lseek,
527 .release = seq_release_private
528};
529
530static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val)
531{
532 *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg);
533 return 0;
534}
535DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops,
536 gk20a_fecs_trace_debugfs_read, NULL, "%llu\n");
537
538static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val)
539{
540 *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg);
541 return 0;
542}
543DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops,
544 gk20a_fecs_trace_debugfs_write, NULL, "%llu\n");
545
546static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
547{
548 struct gk20a_platform *plat = platform_get_drvdata(g->dev);
549
550 debugfs_create_file("ctxsw_trace_read", 0600, plat->debugfs, g,
551 &gk20a_fecs_trace_debugfs_read_fops);
552 debugfs_create_file("ctxsw_trace_write", 0600, plat->debugfs, g,
553 &gk20a_fecs_trace_debugfs_write_fops);
554 debugfs_create_file("ctxsw_trace_ring", 0600, plat->debugfs, g,
555 &gk20a_fecs_trace_debugfs_ring_fops);
556}
557
558static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g)
559{
560 struct gk20a_platform *plat = platform_get_drvdata(g->dev);
561
562 debugfs_remove_recursive(plat->debugfs);
563}
564
565#else
566
567static void gk20a_fecs_trace_debugfs_init(struct gk20a *g)
568{
569}
570
571static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g)
572{
573}
574
575#endif /* CONFIG_DEBUG_FS */
576
577static int gk20a_fecs_trace_init(struct gk20a *g)
578{
579 struct gk20a_fecs_trace *trace;
580 int err;
581
582 trace = kzalloc(sizeof(struct gk20a_fecs_trace), GFP_KERNEL);
583 if (!trace) {
584 gk20a_warn(dev_from_gk20a(g), "failed to allocate fecs_trace");
585 return -ENOMEM;
586 }
587 g->fecs_trace = trace;
588
589 BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
590 err = gk20a_fecs_trace_alloc_ring(g);
591 if (err) {
592 gk20a_warn(dev_from_gk20a(g), "failed to allocate FECS ring");
593 goto clean;
594 }
595
596 mutex_init(&trace->poll_lock);
597 mutex_init(&trace->hash_lock);
598 hash_init(trace->pid_hash_table);
599
600 gk20a_fecs_trace_debugfs_init(g);
601 return 0;
602
603clean:
604 kfree(trace);
605 g->fecs_trace = NULL;
606 return err;
607}
608
609static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
610 struct channel_gk20a *ch)
611{
612 /*
613 * map our circ_buf to the context space and store the GPU VA
614 * in the context header.
615 */
616
617 u32 lo;
618 u32 hi;
619 phys_addr_t pa;
620 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
621 struct gk20a_fecs_trace *trace = g->fecs_trace;
622 void *ctx_ptr;
623 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
624
625 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
626 "hw_chid=%d context_ptr=%x inst_block=%llx",
627 ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block));
628
629 if (!trace)
630 return -ENOMEM;
631
632 pa = gk20a_mem_phys(&trace->trace_buf);
633 if (!pa)
634 return -ENOMEM;
635
636 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
637 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0,
638 pgprot_writecombine(PAGE_KERNEL));
639 if (!ctx_ptr)
640 return -ENOMEM;
641
642 lo = u64_lo32(pa);
643 hi = u64_hi32(pa);
644
645 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
646 lo, GK20A_FECS_TRACE_NUM_RECORDS);
647
648 gk20a_mem_wr32(ctx_ptr
649 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
650 0, lo);
651 gk20a_mem_wr32(ctx_ptr
652 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
653 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi));
654 gk20a_mem_wr32(ctx_ptr
655 + ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
656 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
657 GK20A_FECS_TRACE_NUM_RECORDS));
658
659 vunmap(ctx_ptr);
660 gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid);
661
662 return 0;
663}
664
665static int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
666{
667 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
668
669 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
670 "ch=%p context_ptr=%x", ch, context_ptr);
671
672 if (g->ops.fecs_trace.flush)
673 g->ops.fecs_trace.flush(g);
674 gk20a_fecs_trace_poll(g);
675 gk20a_fecs_trace_hash_del(g, context_ptr);
676 return 0;
677}
678
679static int gk20a_fecs_trace_reset(struct gk20a *g)
680{
681 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
682
683 if (g->ops.fecs_trace.flush)
684 g->ops.fecs_trace.flush(g);
685 gk20a_fecs_trace_poll(g);
686 return gk20a_fecs_trace_set_read_index(g, 0);
687}
688
689static int gk20a_fecs_trace_deinit(struct gk20a *g)
690{
691 struct gk20a_fecs_trace *trace = g->fecs_trace;
692
693 gk20a_fecs_trace_debugfs_cleanup(g);
694 kthread_stop(trace->poll_task);
695 gk20a_fecs_trace_free_ring(g);
696 gk20a_fecs_trace_free_hash_table(g);
697
698 kfree(g->fecs_trace);
699 g->fecs_trace = NULL;
700 return 0;
701}
702
703static int gk20a_gr_max_entries(struct gk20a *g,
704 struct nvgpu_ctxsw_trace_filter *filter)
705{
706 int n;
707 int tag;
708
709 /* Compute number of entries per record, with given filter */
710 for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
711 n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
712
713 /* Return max number of entries generated for the whole ring */
714 return n * GK20A_FECS_TRACE_NUM_RECORDS;
715}
716
717static int gk20a_fecs_trace_enable(struct gk20a *g)
718{
719 struct gk20a_fecs_trace *trace = g->fecs_trace;
720 struct task_struct *task;
721
722 if (!trace->poll_task) {
723 task = kthread_run(gk20a_fecs_trace_periodic_polling, g, __func__);
724 if (unlikely(IS_ERR(task))) {
725 gk20a_warn(dev_from_gk20a(g), "failed to create FECS polling task");
726 return PTR_ERR(task);
727 }
728 trace->poll_task = task;
729 }
730
731 return 0;
732}
733
734static int gk20a_fecs_trace_disable(struct gk20a *g)
735{
736 struct gk20a_fecs_trace *trace = g->fecs_trace;
737
738 if (trace->poll_task) {
739 kthread_stop(trace->poll_task);
740 trace->poll_task = NULL;
741 }
742
743 return -EPERM;
744}
745
746void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
747{
748 ops->fecs_trace.init = gk20a_fecs_trace_init;
749 ops->fecs_trace.deinit = gk20a_fecs_trace_deinit;
750 ops->fecs_trace.enable = gk20a_fecs_trace_enable;
751 ops->fecs_trace.disable = gk20a_fecs_trace_disable;
752 ops->fecs_trace.reset = gk20a_fecs_trace_reset;
753 ops->fecs_trace.flush = NULL;
754 ops->fecs_trace.poll = gk20a_fecs_trace_poll;
755 ops->fecs_trace.bind_channel = gk20a_fecs_trace_bind_channel;
756 ops->fecs_trace.unbind_channel = gk20a_fecs_trace_unbind_channel;
757 ops->fecs_trace.max_entries = gk20a_gr_max_entries;
758}
759#else
760void gk20a_init_fecs_trace_ops(struct gpu_ops *ops)
761{
762}
763#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
new file mode 100644
index 00000000..4979d6c6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#ifndef __FECS_TRACE_GK20A_H
15#define __FECS_TRACE_GK20A_H
16
17struct gpu_ops;
18void gk20a_init_fecs_trace_ops(struct gpu_ops *ops);
19
20#endif /* __FECS_TRACE_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 661c2c38..029a713f 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -25,6 +25,7 @@
25 25
26#include "gk20a.h" 26#include "gk20a.h"
27#include "debug_gk20a.h" 27#include "debug_gk20a.h"
28#include "ctxsw_trace_gk20a.h"
28#include "semaphore_gk20a.h" 29#include "semaphore_gk20a.h"
29#include "hw_fifo_gk20a.h" 30#include "hw_fifo_gk20a.h"
30#include "hw_pbdma_gk20a.h" 31#include "hw_pbdma_gk20a.h"
@@ -776,13 +777,17 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
776 if (engine_id == top_device_info_type_enum_graphics_v()) { 777 if (engine_id == top_device_info_type_enum_graphics_v()) {
777 if (support_gk20a_pmu(g->dev) && g->elpg_enabled) 778 if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
778 gk20a_pmu_disable_elpg(g); 779 gk20a_pmu_disable_elpg(g);
779 /*HALT_PIPELINE method, halt GR engine*/ 780 /*HALT_PIPELINE method, halt GR engine*/
780 if (gr_gk20a_halt_pipe(g)) 781 if (gr_gk20a_halt_pipe(g))
781 gk20a_err(dev_from_gk20a(g), 782 gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe");
782 "failed to HALT gr pipe"); 783 /* resetting engine will alter read/write index.
783 /* resetting engine using mc_enable_r() is not 784 * need to flush circular buffer before re-enabling FECS.
784 enough, we do full init sequence */ 785 */
785 gk20a_gr_reset(g); 786 if (g->ops.fecs_trace.reset)
787 g->ops.fecs_trace.reset(g);
788 /* resetting engine using mc_enable_r() is not
789 enough, we do full init sequence */
790 gk20a_gr_reset(g);
786 if (support_gk20a_pmu(g->dev) && g->elpg_enabled) 791 if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
787 gk20a_pmu_enable_elpg(g); 792 gk20a_pmu_enable_elpg(g);
788 } 793 }
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0cc9564b..735bf90b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -60,6 +60,7 @@
60#include "hw_gr_gk20a.h" 60#include "hw_gr_gk20a.h"
61#include "hw_fb_gk20a.h" 61#include "hw_fb_gk20a.h"
62#include "gk20a_scale.h" 62#include "gk20a_scale.h"
63#include "ctxsw_trace_gk20a.h"
63#include "dbg_gpu_gk20a.h" 64#include "dbg_gpu_gk20a.h"
64#include "gk20a_allocator.h" 65#include "gk20a_allocator.h"
65#include "hal.h" 66#include "hal.h"
@@ -80,7 +81,7 @@
80/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ 81/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
81#define INTERFACE_NAME "nvhost%s-gpu" 82#define INTERFACE_NAME "nvhost%s-gpu"
82 83
83#define GK20A_NUM_CDEVS 6 84#define GK20A_NUM_CDEVS 7
84 85
85#define EMC3D_DEFAULT_RATIO 750 86#define EMC3D_DEFAULT_RATIO 750
86 87
@@ -169,6 +170,19 @@ static const struct file_operations gk20a_tsg_ops = {
169 .unlocked_ioctl = gk20a_tsg_dev_ioctl, 170 .unlocked_ioctl = gk20a_tsg_dev_ioctl,
170}; 171};
171 172
173static const struct file_operations gk20a_ctxsw_ops = {
174 .owner = THIS_MODULE,
175 .release = gk20a_ctxsw_dev_release,
176 .open = gk20a_ctxsw_dev_open,
177#ifdef CONFIG_COMPAT
178 .compat_ioctl = gk20a_ctxsw_dev_ioctl,
179#endif
180 .unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
181 .poll = gk20a_ctxsw_dev_poll,
182 .read = gk20a_ctxsw_dev_read,
183 .mmap = gk20a_ctxsw_dev_mmap,
184};
185
172static inline void sim_writel(struct gk20a *g, u32 r, u32 v) 186static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
173{ 187{
174 writel(v, g->sim.regs+r); 188 writel(v, g->sim.regs+r);
@@ -881,6 +895,10 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
881 goto done; 895 goto done;
882 } 896 }
883 897
898 err = gk20a_ctxsw_trace_init(g);
899 if (err)
900 gk20a_warn(dev, "could not initialize ctxsw tracing");
901
884 /* Restore the debug setting */ 902 /* Restore the debug setting */
885 g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl); 903 g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl);
886 904
@@ -1009,6 +1027,11 @@ void gk20a_user_deinit(struct platform_device *dev)
1009 cdev_del(&g->tsg.cdev); 1027 cdev_del(&g->tsg.cdev);
1010 } 1028 }
1011 1029
1030 if (g->ctxsw.node) {
1031 device_destroy(g->class, g->ctxsw.cdev.dev);
1032 cdev_del(&g->ctxsw.cdev);
1033 }
1034
1012 if (g->cdev_region) 1035 if (g->cdev_region)
1013 unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); 1036 unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
1014 1037
@@ -1074,6 +1097,15 @@ int gk20a_user_init(struct platform_device *dev)
1074 if (err) 1097 if (err)
1075 goto fail; 1098 goto fail;
1076 1099
1100#ifdef CONFIG_GK20A_CTXSW_TRACE
1101 err = gk20a_create_device(dev, devno++, "-ctxsw",
1102 &g->ctxsw.cdev, &g->ctxsw.node,
1103 &gk20a_ctxsw_ops);
1104 if (err)
1105 goto fail;
1106#endif
1107
1108
1077 return 0; 1109 return 0;
1078fail: 1110fail:
1079 gk20a_user_deinit(dev); 1111 gk20a_user_deinit(dev);
@@ -1554,6 +1586,8 @@ static int __exit gk20a_remove(struct platform_device *dev)
1554 if (platform->has_cde) 1586 if (platform->has_cde)
1555 gk20a_cde_destroy(g); 1587 gk20a_cde_destroy(g);
1556 1588
1589 gk20a_ctxsw_trace_cleanup(g);
1590
1557 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) 1591 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1558 gk20a_scale_exit(dev); 1592 gk20a_scale_exit(dev);
1559 1593
@@ -2091,6 +2125,19 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name)
2091 return fw; 2125 return fw;
2092} 2126}
2093 2127
2128
2129u64 gk20a_read_ptimer(struct gk20a *g)
2130{
2131 u32 time_hi0 = gk20a_readl(g, timer_time_1_r());
2132 u32 time_lo = gk20a_readl(g, timer_time_0_r());
2133 u32 time_hi1 = gk20a_readl(g, timer_time_1_r());
2134 u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1;
2135 u64 time = ((u64)time_hi << 32) | time_lo;
2136
2137 return time;
2138}
2139
2140
2094MODULE_LICENSE("GPL v2"); 2141MODULE_LICENSE("GPL v2");
2095module_init(gk20a_init); 2142module_init(gk20a_init);
2096module_exit(gk20a_exit); 2143module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8b87c7aa..541e7b50 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -25,6 +25,8 @@ struct channel_gk20a;
25struct gr_gk20a; 25struct gr_gk20a;
26struct sim_gk20a; 26struct sim_gk20a;
27struct gk20a_ctxsw_ucode_segments; 27struct gk20a_ctxsw_ucode_segments;
28struct gk20a_fecs_trace;
29struct gk20a_ctxsw_trace;
28struct acr_gm20b; 30struct acr_gm20b;
29 31
30#include <linux/sched.h> 32#include <linux/sched.h>
@@ -373,6 +375,19 @@ struct gpu_ops {
373 bool use_dma_for_fw_bootstrap; 375 bool use_dma_for_fw_bootstrap;
374 } gr_ctx; 376 } gr_ctx;
375 struct { 377 struct {
378 int (*init)(struct gk20a *g);
379 int (*max_entries)(struct gk20a *,
380 struct nvgpu_ctxsw_trace_filter *);
381 int (*flush)(struct gk20a *g);
382 int (*poll)(struct gk20a *g);
383 int (*enable)(struct gk20a *g);
384 int (*disable)(struct gk20a *g);
385 int (*reset)(struct gk20a *g);
386 int (*bind_channel)(struct gk20a *, struct channel_gk20a *);
387 int (*unbind_channel)(struct gk20a *, struct channel_gk20a *);
388 int (*deinit)(struct gk20a *g);
389 } fecs_trace;
390 struct {
376 bool (*support_sparse)(struct gk20a *g); 391 bool (*support_sparse)(struct gk20a *g);
377 bool (*is_debug_mode_enabled)(struct gk20a *g); 392 bool (*is_debug_mode_enabled)(struct gk20a *g);
378 void (*set_debug_mode)(struct gk20a *g, bool enable); 393 void (*set_debug_mode)(struct gk20a *g, bool enable);
@@ -613,6 +628,11 @@ struct gk20a {
613 struct device *node; 628 struct device *node;
614 } tsg; 629 } tsg;
615 630
631 struct {
632 struct cdev cdev;
633 struct device *node;
634 } ctxsw;
635
616 struct mutex client_lock; 636 struct mutex client_lock;
617 int client_refcount; /* open channels and ctrl nodes */ 637 int client_refcount; /* open channels and ctrl nodes */
618 638
@@ -639,6 +659,9 @@ struct gk20a {
639 659
640 struct gk20a_scale_profile *scale_profile; 660 struct gk20a_scale_profile *scale_profile;
641 661
662 struct gk20a_ctxsw_trace *ctxsw_trace;
663 struct gk20a_fecs_trace *fecs_trace;
664
642 struct device_dma_parameters dma_parms; 665 struct device_dma_parameters dma_parms;
643 666
644 struct gk20a_cde_app cde_app; 667 struct gk20a_cde_app cde_app;
@@ -716,6 +739,7 @@ enum gk20a_dbg_categories {
716 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ 739 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */
717 gpu_dbg_cde = BIT(10), /* cde info messages */ 740 gpu_dbg_cde = BIT(10), /* cde info messages */
718 gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ 741 gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */
742 gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */
719 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ 743 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
720}; 744};
721 745
@@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
962 else 986 else
963 return (timeout * 10) / scale10x; 987 return (timeout * 10) / scale10x;
964} 988}
989
990u64 gk20a_read_ptimer(struct gk20a *g);
965#endif /* GK20A_H */ 991#endif /* GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 60bba0b8..08f1d921 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -56,6 +56,7 @@
56#include "debug_gk20a.h" 56#include "debug_gk20a.h"
57#include "semaphore_gk20a.h" 57#include "semaphore_gk20a.h"
58#include "platform_gk20a.h" 58#include "platform_gk20a.h"
59#include "ctxsw_trace_gk20a.h"
59 60
60#define BLK_SIZE (256) 61#define BLK_SIZE (256)
61 62
@@ -2855,6 +2856,13 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2855 "fail to load golden ctx image"); 2856 "fail to load golden ctx image");
2856 goto out; 2857 goto out;
2857 } 2858 }
2859 if (g->ops.fecs_trace.bind_channel) {
2860 err = g->ops.fecs_trace.bind_channel(g, c);
2861 if (err) {
2862 gk20a_warn(dev_from_gk20a(g),
2863 "fail to bind channel for ctxsw trace");
2864 }
2865 }
2858 c->first_init = true; 2866 c->first_init = true;
2859 } 2867 }
2860 2868
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index a9ad970a..9718aad2 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -22,6 +22,7 @@
22#include "gk20a_gating_reglist.h" 22#include "gk20a_gating_reglist.h"
23#include "channel_gk20a.h" 23#include "channel_gk20a.h"
24#include "gr_ctx_gk20a.h" 24#include "gr_ctx_gk20a.h"
25#include "fecs_trace_gk20a.h"
25#include "mm_gk20a.h" 26#include "mm_gk20a.h"
26#include "mc_gk20a.h" 27#include "mc_gk20a.h"
27#include "pmu_gk20a.h" 28#include "pmu_gk20a.h"
@@ -57,6 +58,7 @@ int gk20a_init_hal(struct gk20a *g)
57 gk20a_init_mc(gops); 58 gk20a_init_mc(gops);
58 gk20a_init_ltc(gops); 59 gk20a_init_ltc(gops);
59 gk20a_init_gr_ops(gops); 60 gk20a_init_gr_ops(gops);
61 gk20a_init_fecs_trace_ops(gops);
60 gk20a_init_fb(gops); 62 gk20a_init_fb(gops);
61 gk20a_init_fifo(gops); 63 gk20a_init_fifo(gops);
62 gk20a_init_ce2(gops); 64 gk20a_init_ce2(gops);
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
index 39cbbb58..da555f7c 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -246,4 +246,192 @@ static inline u32 ctxsw_prog_main_image_context_id_o(void)
246{ 246{
247 return 0x000000f0; 247 return 0x000000f0;
248} 248}
249static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_o(void)
250{
251 return 0x000000ac;
252}
253static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(u32 v)
254{
255 return (v & 0xffff) << 0;
256}
257static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(void)
258{
259 return 0x000000b0;
260}
261static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_v_m(void)
262{
263 return 0xfffffff << 0;
264}
265static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_m(void)
266{
267 return 0x3 << 28;
268}
269static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f(void)
270{
271 return 0x0;
272}
273static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(void)
274{
275 return 0x20000000;
276}
277static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(void)
278{
279 return 0x30000000;
280}
281static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(void)
282{
283 return 0x000000b4;
284}
285static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u32 v)
286{
287 return (v & 0xffffffff) << 0;
288}
289static inline u32 ctxsw_prog_record_timestamp_record_size_in_bytes_v(void)
290{
291 return 0x00000080;
292}
293static inline u32 ctxsw_prog_record_timestamp_record_size_in_words_v(void)
294{
295 return 0x00000020;
296}
297static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_o(void)
298{
299 return 0x00000000;
300}
301static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_v_value_v(void)
302{
303 return 0x00000000;
304}
305static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_o(void)
306{
307 return 0x00000004;
308}
309static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(void)
310{
311 return 0x600dbeef;
312}
313static inline u32 ctxsw_prog_record_timestamp_context_id_o(void)
314{
315 return 0x00000008;
316}
317static inline u32 ctxsw_prog_record_timestamp_context_ptr_o(void)
318{
319 return 0x0000000c;
320}
321static inline u32 ctxsw_prog_record_timestamp_new_context_id_o(void)
322{
323 return 0x00000010;
324}
325static inline u32 ctxsw_prog_record_timestamp_new_context_ptr_o(void)
326{
327 return 0x00000014;
328}
329static inline u32 ctxsw_prog_record_timestamp_timestamp_lo_o(void)
330{
331 return 0x00000018;
332}
333static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_o(void)
334{
335 return 0x0000001c;
336}
337static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_f(u32 v)
338{
339 return (v & 0xffffff) << 0;
340}
341static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_v(u32 r)
342{
343 return (r >> 0) & 0xffffff;
344}
345static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_f(u32 v)
346{
347 return (v & 0xff) << 24;
348}
349static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_m(void)
350{
351 return 0xff << 24;
352}
353static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_v(u32 r)
354{
355 return (r >> 24) & 0xff;
356}
357static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_v(void)
358{
359 return 0x00000001;
360}
361static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_f(void)
362{
363 return 0x1000000;
364}
365static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_v(void)
366{
367 return 0x00000002;
368}
369static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_f(void)
370{
371 return 0x2000000;
372}
373static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_v(void)
374{
375 return 0x0000000a;
376}
377static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_f(void)
378{
379 return 0xa000000;
380}
381static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_v(void)
382{
383 return 0x0000000b;
384}
385static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_f(void)
386{
387 return 0xb000000;
388}
389static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_v(void)
390{
391 return 0x0000000c;
392}
393static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_f(void)
394{
395 return 0xc000000;
396}
397static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_v(void)
398{
399 return 0x0000000d;
400}
401static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_f(void)
402{
403 return 0xd000000;
404}
405static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_v(void)
406{
407 return 0x00000003;
408}
409static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_f(void)
410{
411 return 0x3000000;
412}
413static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_v(void)
414{
415 return 0x00000004;
416}
417static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_f(void)
418{
419 return 0x4000000;
420}
421static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_v(void)
422{
423 return 0x00000005;
424}
425static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_f(void)
426{
427 return 0x5000000;
428}
429static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(void)
430{
431 return 0x000000ff;
432}
433static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_f(void)
434{
435 return 0xff000000;
436}
249#endif 437#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
index dbbc914f..4cb36cbe 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,