diff options
Diffstat (limited to 'include/os/linux/ctxsw_trace.c')
-rw-r--r-- | include/os/linux/ctxsw_trace.c | 792 |
1 files changed, 792 insertions, 0 deletions
diff --git a/include/os/linux/ctxsw_trace.c b/include/os/linux/ctxsw_trace.c new file mode 100644 index 0000000..2d36d9c --- /dev/null +++ b/include/os/linux/ctxsw_trace.c | |||
@@ -0,0 +1,792 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/wait.h> | ||
18 | #include <linux/ktime.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | #include <linux/poll.h> | ||
21 | #include <trace/events/gk20a.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | #include <nvgpu/ctxsw_trace.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/log.h> | ||
26 | #include <nvgpu/atomic.h> | ||
27 | #include <nvgpu/barrier.h> | ||
28 | #include <nvgpu/gk20a.h> | ||
29 | #include <nvgpu/channel.h> | ||
30 | |||
31 | #include "gk20a/gr_gk20a.h" | ||
32 | #include "gk20a/fecs_trace_gk20a.h" | ||
33 | |||
34 | #include "platform_gk20a.h" | ||
35 | #include "os_linux.h" | ||
36 | #include "ctxsw_trace.h" | ||
37 | |||
38 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
39 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
40 | |||
41 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
42 | |||
43 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
44 | struct gk20a_ctxsw_dev { | ||
45 | struct gk20a *g; | ||
46 | |||
47 | struct nvgpu_ctxsw_ring_header *hdr; | ||
48 | struct nvgpu_gpu_ctxsw_trace_entry *ents; | ||
49 | struct nvgpu_gpu_ctxsw_trace_filter filter; | ||
50 | bool write_enabled; | ||
51 | struct nvgpu_cond readout_wq; | ||
52 | size_t size; | ||
53 | u32 num_ents; | ||
54 | |||
55 | nvgpu_atomic_t vma_ref; | ||
56 | |||
57 | struct nvgpu_mutex write_lock; | ||
58 | }; | ||
59 | |||
60 | |||
61 | struct gk20a_ctxsw_trace { | ||
62 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
63 | }; | ||
64 | |||
65 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
66 | { | ||
67 | return (hdr->write_idx == hdr->read_idx); | ||
68 | } | ||
69 | |||
70 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
71 | { | ||
72 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
73 | } | ||
74 | |||
75 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
76 | { | ||
77 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
78 | } | ||
79 | |||
80 | static void nvgpu_set_ctxsw_trace_entry(struct nvgpu_ctxsw_trace_entry *entry_dst, | ||
81 | struct nvgpu_gpu_ctxsw_trace_entry *entry_src) | ||
82 | { | ||
83 | entry_dst->tag = entry_src->tag; | ||
84 | entry_dst->vmid = entry_src->vmid; | ||
85 | entry_dst->seqno = entry_src->seqno; | ||
86 | entry_dst->context_id = entry_src->context_id; | ||
87 | entry_dst->pid = entry_src->pid; | ||
88 | entry_dst->timestamp = entry_src->timestamp; | ||
89 | } | ||
90 | |||
91 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
92 | loff_t *off) | ||
93 | { | ||
94 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
95 | struct gk20a *g = dev->g; | ||
96 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
97 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
98 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
99 | struct nvgpu_ctxsw_trace_entry user_entry; | ||
100 | size_t copied = 0; | ||
101 | int err; | ||
102 | |||
103 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | ||
104 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
105 | |||
106 | nvgpu_mutex_acquire(&dev->write_lock); | ||
107 | while (ring_is_empty(hdr)) { | ||
108 | nvgpu_mutex_release(&dev->write_lock); | ||
109 | if (filp->f_flags & O_NONBLOCK) | ||
110 | return -EAGAIN; | ||
111 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
112 | !ring_is_empty(hdr), 0); | ||
113 | if (err) | ||
114 | return err; | ||
115 | nvgpu_mutex_acquire(&dev->write_lock); | ||
116 | } | ||
117 | |||
118 | while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) { | ||
119 | if (ring_is_empty(hdr)) | ||
120 | break; | ||
121 | |||
122 | nvgpu_set_ctxsw_trace_entry(&user_entry, &dev->ents[hdr->read_idx]); | ||
123 | if (copy_to_user(entry, &user_entry, | ||
124 | sizeof(*entry))) { | ||
125 | nvgpu_mutex_release(&dev->write_lock); | ||
126 | return -EFAULT; | ||
127 | } | ||
128 | |||
129 | hdr->read_idx++; | ||
130 | if (hdr->read_idx >= hdr->num_ents) | ||
131 | hdr->read_idx = 0; | ||
132 | |||
133 | entry++; | ||
134 | copied += sizeof(*entry); | ||
135 | size -= sizeof(*entry); | ||
136 | } | ||
137 | |||
138 | nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
139 | hdr->read_idx); | ||
140 | |||
141 | *off = hdr->read_idx; | ||
142 | nvgpu_mutex_release(&dev->write_lock); | ||
143 | |||
144 | return copied; | ||
145 | } | ||
146 | |||
147 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
148 | { | ||
149 | struct gk20a *g = dev->g; | ||
150 | |||
151 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
152 | nvgpu_mutex_acquire(&dev->write_lock); | ||
153 | dev->write_enabled = true; | ||
154 | nvgpu_mutex_release(&dev->write_lock); | ||
155 | dev->g->ops.fecs_trace.enable(dev->g); | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
160 | { | ||
161 | struct gk20a *g = dev->g; | ||
162 | |||
163 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
164 | dev->g->ops.fecs_trace.disable(dev->g); | ||
165 | nvgpu_mutex_acquire(&dev->write_lock); | ||
166 | dev->write_enabled = false; | ||
167 | nvgpu_mutex_release(&dev->write_lock); | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, | ||
172 | size_t size) | ||
173 | { | ||
174 | struct gk20a *g = dev->g; | ||
175 | void *buf; | ||
176 | int err; | ||
177 | |||
178 | if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) | ||
179 | return -EBUSY; | ||
180 | |||
181 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); | ||
182 | if (err) | ||
183 | return err; | ||
184 | |||
185 | |||
186 | dev->hdr = buf; | ||
187 | dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
188 | dev->size = size; | ||
189 | dev->num_ents = dev->hdr->num_ents; | ||
190 | |||
191 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
192 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
197 | void **buf, size_t *size) | ||
198 | { | ||
199 | struct nvgpu_ctxsw_ring_header *hdr; | ||
200 | |||
201 | *size = roundup(*size, PAGE_SIZE); | ||
202 | hdr = vmalloc_user(*size); | ||
203 | if (!hdr) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
207 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
208 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
209 | / sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
210 | hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
211 | hdr->drop_count = 0; | ||
212 | hdr->read_idx = 0; | ||
213 | hdr->write_idx = 0; | ||
214 | hdr->write_seqno = 0; | ||
215 | |||
216 | *buf = hdr; | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
221 | { | ||
222 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
223 | |||
224 | nvgpu_vfree(g, dev->hdr); | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
229 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
230 | { | ||
231 | struct gk20a *g = dev->g; | ||
232 | size_t size = args->size; | ||
233 | int ret; | ||
234 | |||
235 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
236 | |||
237 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
238 | return -EINVAL; | ||
239 | |||
240 | nvgpu_mutex_acquire(&dev->write_lock); | ||
241 | ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
242 | nvgpu_mutex_release(&dev->write_lock); | ||
243 | |||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | static void nvgpu_set_ctxsw_trace_filter_args(struct nvgpu_gpu_ctxsw_trace_filter *filter_dst, | ||
248 | struct nvgpu_ctxsw_trace_filter *filter_src) | ||
249 | { | ||
250 | memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64); | ||
251 | } | ||
252 | |||
253 | static void nvgpu_get_ctxsw_trace_filter_args(struct nvgpu_ctxsw_trace_filter *filter_dst, | ||
254 | struct nvgpu_gpu_ctxsw_trace_filter *filter_src) | ||
255 | { | ||
256 | memcpy(filter_dst->tag_bits, filter_src->tag_bits, (NVGPU_CTXSW_FILTER_SIZE + 63) / 64); | ||
257 | } | ||
258 | |||
259 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
260 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
261 | { | ||
262 | struct gk20a *g = dev->g; | ||
263 | |||
264 | nvgpu_mutex_acquire(&dev->write_lock); | ||
265 | nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter); | ||
266 | nvgpu_mutex_release(&dev->write_lock); | ||
267 | |||
268 | if (g->ops.fecs_trace.set_filter) | ||
269 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
274 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
275 | { | ||
276 | nvgpu_mutex_acquire(&dev->write_lock); | ||
277 | nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter); | ||
278 | nvgpu_mutex_release(&dev->write_lock); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
284 | { | ||
285 | struct gk20a *g = dev->g; | ||
286 | int err; | ||
287 | |||
288 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
289 | |||
290 | err = gk20a_busy(g); | ||
291 | if (err) | ||
292 | return err; | ||
293 | |||
294 | if (g->ops.fecs_trace.flush) | ||
295 | err = g->ops.fecs_trace.flush(g); | ||
296 | |||
297 | if (likely(!err)) | ||
298 | err = g->ops.fecs_trace.poll(g); | ||
299 | |||
300 | gk20a_idle(g); | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
305 | { | ||
306 | struct nvgpu_os_linux *l; | ||
307 | struct gk20a *g; | ||
308 | struct gk20a_ctxsw_trace *trace; | ||
309 | struct gk20a_ctxsw_dev *dev; | ||
310 | int err; | ||
311 | size_t size; | ||
312 | u32 n; | ||
313 | |||
314 | /* only one VM for now */ | ||
315 | const int vmid = 0; | ||
316 | |||
317 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); | ||
318 | g = gk20a_get(&l->g); | ||
319 | if (!g) | ||
320 | return -ENODEV; | ||
321 | |||
322 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
323 | |||
324 | err = gk20a_busy(g); | ||
325 | if (err) | ||
326 | goto free_ref; | ||
327 | |||
328 | trace = g->ctxsw_trace; | ||
329 | if (!trace) { | ||
330 | err = -ENODEV; | ||
331 | goto idle; | ||
332 | } | ||
333 | |||
334 | /* Allow only one user for this device */ | ||
335 | dev = &trace->devs[vmid]; | ||
336 | nvgpu_mutex_acquire(&dev->write_lock); | ||
337 | if (dev->hdr) { | ||
338 | err = -EBUSY; | ||
339 | goto done; | ||
340 | } | ||
341 | |||
342 | /* By default, allocate ring buffer big enough to accommodate | ||
343 | * FECS records with default event filter */ | ||
344 | |||
345 | /* enable all traces by default */ | ||
346 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
347 | |||
348 | /* compute max number of entries generated with this filter */ | ||
349 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
350 | |||
351 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
352 | n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry); | ||
353 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
354 | size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry)); | ||
355 | |||
356 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
357 | if (!err) { | ||
358 | filp->private_data = dev; | ||
359 | nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
360 | filp, dev, size); | ||
361 | } | ||
362 | |||
363 | done: | ||
364 | nvgpu_mutex_release(&dev->write_lock); | ||
365 | |||
366 | idle: | ||
367 | gk20a_idle(g); | ||
368 | free_ref: | ||
369 | if (err) | ||
370 | gk20a_put(g); | ||
371 | return err; | ||
372 | } | ||
373 | |||
374 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
375 | { | ||
376 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
377 | struct gk20a *g = dev->g; | ||
378 | |||
379 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
380 | |||
381 | g->ops.fecs_trace.disable(g); | ||
382 | |||
383 | nvgpu_mutex_acquire(&dev->write_lock); | ||
384 | dev->write_enabled = false; | ||
385 | nvgpu_mutex_release(&dev->write_lock); | ||
386 | |||
387 | if (dev->hdr) { | ||
388 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); | ||
389 | dev->hdr = NULL; | ||
390 | } | ||
391 | gk20a_put(g); | ||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
396 | unsigned long arg) | ||
397 | { | ||
398 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
399 | struct gk20a *g = dev->g; | ||
400 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
401 | int err = 0; | ||
402 | |||
403 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
404 | |||
405 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || | ||
406 | (_IOC_NR(cmd) == 0) || | ||
407 | (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || | ||
408 | (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) | ||
409 | return -EINVAL; | ||
410 | |||
411 | memset(buf, 0, sizeof(buf)); | ||
412 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
413 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
414 | return -EFAULT; | ||
415 | } | ||
416 | |||
417 | switch (cmd) { | ||
418 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
419 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
420 | break; | ||
421 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
422 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
423 | break; | ||
424 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
425 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
426 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
427 | break; | ||
428 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
429 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
430 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
431 | break; | ||
432 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
433 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
434 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
435 | break; | ||
436 | case NVGPU_CTXSW_IOCTL_POLL: | ||
437 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
438 | break; | ||
439 | default: | ||
440 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
441 | cmd); | ||
442 | err = -ENOTTY; | ||
443 | } | ||
444 | |||
445 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
446 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
447 | |||
448 | return err; | ||
449 | } | ||
450 | |||
451 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
452 | { | ||
453 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
454 | struct gk20a *g = dev->g; | ||
455 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
456 | unsigned int mask = 0; | ||
457 | |||
458 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
459 | |||
460 | nvgpu_mutex_acquire(&dev->write_lock); | ||
461 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
462 | if (!ring_is_empty(hdr)) | ||
463 | mask |= POLLIN | POLLRDNORM; | ||
464 | nvgpu_mutex_release(&dev->write_lock); | ||
465 | |||
466 | return mask; | ||
467 | } | ||
468 | |||
469 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
470 | { | ||
471 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
472 | struct gk20a *g = dev->g; | ||
473 | |||
474 | nvgpu_atomic_inc(&dev->vma_ref); | ||
475 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
476 | nvgpu_atomic_read(&dev->vma_ref)); | ||
477 | } | ||
478 | |||
479 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
480 | { | ||
481 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
482 | struct gk20a *g = dev->g; | ||
483 | |||
484 | nvgpu_atomic_dec(&dev->vma_ref); | ||
485 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
486 | nvgpu_atomic_read(&dev->vma_ref)); | ||
487 | } | ||
488 | |||
489 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
490 | .open = gk20a_ctxsw_dev_vma_open, | ||
491 | .close = gk20a_ctxsw_dev_vma_close, | ||
492 | }; | ||
493 | |||
494 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
495 | struct vm_area_struct *vma) | ||
496 | { | ||
497 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
498 | } | ||
499 | |||
500 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
501 | { | ||
502 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
503 | struct gk20a *g = dev->g; | ||
504 | int ret; | ||
505 | |||
506 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
507 | vma->vm_start, vma->vm_end); | ||
508 | |||
509 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); | ||
510 | if (likely(!ret)) { | ||
511 | vma->vm_private_data = dev; | ||
512 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
513 | vma->vm_ops->open(vma); | ||
514 | } | ||
515 | |||
516 | return ret; | ||
517 | } | ||
518 | |||
519 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
520 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
521 | { | ||
522 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
523 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
524 | int err; | ||
525 | int i; | ||
526 | |||
527 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
528 | dev->g = g; | ||
529 | dev->hdr = NULL; | ||
530 | dev->write_enabled = false; | ||
531 | nvgpu_cond_init(&dev->readout_wq); | ||
532 | err = nvgpu_mutex_init(&dev->write_lock); | ||
533 | if (err) | ||
534 | return err; | ||
535 | nvgpu_atomic_set(&dev->vma_ref, 0); | ||
536 | dev++; | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | #endif | ||
541 | |||
542 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
543 | { | ||
544 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
545 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
546 | int err; | ||
547 | |||
548 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
549 | |||
550 | /* if tracing is not supported, skip this */ | ||
551 | if (!g->ops.fecs_trace.init) | ||
552 | return 0; | ||
553 | |||
554 | if (likely(trace)) { | ||
555 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | trace = nvgpu_kzalloc(g, sizeof(*trace)); | ||
560 | if (unlikely(!trace)) | ||
561 | return -ENOMEM; | ||
562 | g->ctxsw_trace = trace; | ||
563 | |||
564 | err = gk20a_ctxsw_init_devs(g); | ||
565 | if (err) | ||
566 | goto fail; | ||
567 | |||
568 | err = g->ops.fecs_trace.init(g); | ||
569 | if (unlikely(err)) | ||
570 | goto fail; | ||
571 | |||
572 | return 0; | ||
573 | |||
574 | fail: | ||
575 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
576 | nvgpu_kfree(g, trace); | ||
577 | g->ctxsw_trace = NULL; | ||
578 | return err; | ||
579 | #else | ||
580 | return 0; | ||
581 | #endif | ||
582 | } | ||
583 | |||
584 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
585 | { | ||
586 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
587 | struct gk20a_ctxsw_trace *trace; | ||
588 | struct gk20a_ctxsw_dev *dev; | ||
589 | int i; | ||
590 | |||
591 | if (!g->ctxsw_trace) | ||
592 | return; | ||
593 | |||
594 | trace = g->ctxsw_trace; | ||
595 | dev = trace->devs; | ||
596 | |||
597 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
598 | nvgpu_mutex_destroy(&dev->write_lock); | ||
599 | dev++; | ||
600 | } | ||
601 | |||
602 | nvgpu_kfree(g, g->ctxsw_trace); | ||
603 | g->ctxsw_trace = NULL; | ||
604 | |||
605 | g->ops.fecs_trace.deinit(g); | ||
606 | #endif | ||
607 | } | ||
608 | |||
609 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
610 | struct nvgpu_gpu_ctxsw_trace_entry *entry) | ||
611 | { | ||
612 | struct nvgpu_ctxsw_ring_header *hdr; | ||
613 | struct gk20a_ctxsw_dev *dev; | ||
614 | int ret = 0; | ||
615 | const char *reason; | ||
616 | u32 write_idx; | ||
617 | |||
618 | if (!g->ctxsw_trace) | ||
619 | return 0; | ||
620 | |||
621 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
622 | return -ENODEV; | ||
623 | |||
624 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
625 | hdr = dev->hdr; | ||
626 | |||
627 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
628 | "dev=%p hdr=%p", dev, hdr); | ||
629 | |||
630 | nvgpu_mutex_acquire(&dev->write_lock); | ||
631 | |||
632 | if (unlikely(!hdr)) { | ||
633 | /* device has been released */ | ||
634 | ret = -ENODEV; | ||
635 | goto done; | ||
636 | } | ||
637 | |||
638 | write_idx = hdr->write_idx; | ||
639 | if (write_idx >= dev->num_ents) { | ||
640 | nvgpu_err(dev->g, | ||
641 | "write_idx=%u out of range [0..%u]", | ||
642 | write_idx, dev->num_ents); | ||
643 | ret = -ENOSPC; | ||
644 | reason = "write_idx out of range"; | ||
645 | goto disable; | ||
646 | } | ||
647 | |||
648 | entry->seqno = hdr->write_seqno++; | ||
649 | |||
650 | if (!dev->write_enabled) { | ||
651 | ret = -EBUSY; | ||
652 | reason = "write disabled"; | ||
653 | goto drop; | ||
654 | } | ||
655 | |||
656 | if (unlikely(ring_is_full(hdr))) { | ||
657 | ret = -ENOSPC; | ||
658 | reason = "user fifo full"; | ||
659 | goto drop; | ||
660 | } | ||
661 | |||
662 | if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
663 | reason = "filtered out"; | ||
664 | goto filter; | ||
665 | } | ||
666 | |||
667 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
668 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
669 | entry->seqno, entry->context_id, entry->pid, | ||
670 | entry->tag, entry->timestamp); | ||
671 | |||
672 | dev->ents[write_idx] = *entry; | ||
673 | |||
674 | /* ensure record is written before updating write index */ | ||
675 | nvgpu_smp_wmb(); | ||
676 | |||
677 | write_idx++; | ||
678 | if (unlikely(write_idx >= hdr->num_ents)) | ||
679 | write_idx = 0; | ||
680 | hdr->write_idx = write_idx; | ||
681 | nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
682 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
683 | |||
684 | nvgpu_mutex_release(&dev->write_lock); | ||
685 | return ret; | ||
686 | |||
687 | disable: | ||
688 | g->ops.fecs_trace.disable(g); | ||
689 | |||
690 | drop: | ||
691 | hdr->drop_count++; | ||
692 | |||
693 | filter: | ||
694 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
695 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
696 | "tag=%x time=%llx (%s)", | ||
697 | entry->seqno, entry->context_id, entry->pid, | ||
698 | entry->tag, entry->timestamp, reason); | ||
699 | |||
700 | done: | ||
701 | nvgpu_mutex_release(&dev->write_lock); | ||
702 | return ret; | ||
703 | } | ||
704 | |||
705 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
706 | { | ||
707 | struct gk20a_ctxsw_dev *dev; | ||
708 | |||
709 | if (!g->ctxsw_trace) | ||
710 | return; | ||
711 | |||
712 | dev = &g->ctxsw_trace->devs[vmid]; | ||
713 | nvgpu_cond_signal_interruptible(&dev->readout_wq); | ||
714 | } | ||
715 | |||
716 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | ||
717 | { | ||
718 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
719 | struct nvgpu_gpu_ctxsw_trace_entry entry = { | ||
720 | .vmid = 0, | ||
721 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
722 | .context_id = 0, | ||
723 | .pid = ch->tgid, | ||
724 | }; | ||
725 | |||
726 | if (!g->ctxsw_trace) | ||
727 | return; | ||
728 | |||
729 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
730 | gk20a_ctxsw_trace_write(g, &entry); | ||
731 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
732 | #endif | ||
733 | trace_gk20a_channel_reset(ch->chid, ch->tsgid); | ||
734 | } | ||
735 | |||
736 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | ||
737 | { | ||
738 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
739 | struct nvgpu_gpu_ctxsw_trace_entry entry = { | ||
740 | .vmid = 0, | ||
741 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
742 | .context_id = 0, | ||
743 | .pid = tsg->tgid, | ||
744 | }; | ||
745 | |||
746 | if (!g->ctxsw_trace) | ||
747 | return; | ||
748 | |||
749 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
750 | gk20a_ctxsw_trace_write(g, &entry); | ||
751 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
752 | #endif | ||
753 | trace_gk20a_channel_reset(~0, tsg->tsgid); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_* | ||
758 | * into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_* | ||
759 | */ | ||
760 | |||
761 | u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags) | ||
762 | { | ||
763 | switch (tags){ | ||
764 | case NVGPU_CTXSW_TAG_SOF: | ||
765 | return NVGPU_GPU_CTXSW_TAG_SOF; | ||
766 | case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: | ||
767 | return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST; | ||
768 | case NVGPU_CTXSW_TAG_FE_ACK: | ||
769 | return NVGPU_GPU_CTXSW_TAG_FE_ACK; | ||
770 | case NVGPU_CTXSW_TAG_FE_ACK_WFI: | ||
771 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI; | ||
772 | case NVGPU_CTXSW_TAG_FE_ACK_GFXP: | ||
773 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP; | ||
774 | case NVGPU_CTXSW_TAG_FE_ACK_CTAP: | ||
775 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP; | ||
776 | case NVGPU_CTXSW_TAG_FE_ACK_CILP: | ||
777 | return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP; | ||
778 | case NVGPU_CTXSW_TAG_SAVE_END: | ||
779 | return NVGPU_GPU_CTXSW_TAG_SAVE_END; | ||
780 | case NVGPU_CTXSW_TAG_RESTORE_START: | ||
781 | return NVGPU_GPU_CTXSW_TAG_RESTORE_START; | ||
782 | case NVGPU_CTXSW_TAG_CONTEXT_START: | ||
783 | return NVGPU_GPU_CTXSW_TAG_CONTEXT_START; | ||
784 | case NVGPU_CTXSW_TAG_ENGINE_RESET: | ||
785 | return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET; | ||
786 | case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP: | ||
787 | return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP; | ||
788 | } | ||
789 | |||
790 | WARN_ON(1); | ||
791 | return tags; | ||
792 | } | ||