diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 586 |
1 files changed, 586 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c new file mode 100644 index 00000000..9e7c04ad --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <asm/barrier.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/circ_buf.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/jiffies.h> | ||
20 | #include <linux/wait.h> | ||
21 | #include <linux/ktime.h> | ||
22 | #include <linux/nvgpu.h> | ||
23 | #include <linux/hashtable.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/log2.h> | ||
26 | #include <uapi/linux/nvgpu.h> | ||
27 | #include "ctxsw_trace_gk20a.h" | ||
28 | #include "gk20a.h" | ||
29 | #include "gr_gk20a.h" | ||
30 | #include "hw_ctxsw_prog_gk20a.h" | ||
31 | #include "hw_gr_gk20a.h" | ||
32 | |||
33 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
34 | |||
35 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
36 | struct gk20a_ctxsw_dev { | ||
37 | struct gk20a *g; | ||
38 | |||
39 | struct nvgpu_ctxsw_ring_header *hdr; | ||
40 | struct nvgpu_ctxsw_trace_entry *ents; | ||
41 | struct nvgpu_ctxsw_trace_filter filter; | ||
42 | bool write_enabled; | ||
43 | wait_queue_head_t readout_wq; | ||
44 | size_t size; | ||
45 | |||
46 | atomic_t vma_ref; | ||
47 | |||
48 | struct mutex lock; | ||
49 | }; | ||
50 | |||
51 | |||
52 | struct gk20a_ctxsw_trace { | ||
53 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
54 | }; | ||
55 | |||
56 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
57 | { | ||
58 | return (hdr->write_idx == hdr->read_idx); | ||
59 | } | ||
60 | |||
61 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
62 | { | ||
63 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
64 | } | ||
65 | |||
66 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
67 | { | ||
68 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
69 | } | ||
70 | |||
71 | static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr) | ||
72 | { | ||
73 | return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents; | ||
74 | } | ||
75 | |||
76 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
77 | loff_t *off) | ||
78 | { | ||
79 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
80 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
81 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
82 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
83 | size_t copied = 0; | ||
84 | int err; | ||
85 | |||
86 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
87 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
88 | |||
89 | mutex_lock(&dev->lock); | ||
90 | while (ring_is_empty(hdr)) { | ||
91 | mutex_unlock(&dev->lock); | ||
92 | if (filp->f_flags & O_NONBLOCK) | ||
93 | return -EAGAIN; | ||
94 | err = wait_event_interruptible(dev->readout_wq, | ||
95 | !ring_is_empty(hdr)); | ||
96 | if (err) | ||
97 | return err; | ||
98 | mutex_lock(&dev->lock); | ||
99 | } | ||
100 | |||
101 | while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { | ||
102 | if (ring_is_empty(hdr)) | ||
103 | break; | ||
104 | |||
105 | if (copy_to_user(entry, &dev->ents[hdr->read_idx], | ||
106 | sizeof(*entry))) { | ||
107 | mutex_unlock(&dev->lock); | ||
108 | return -EFAULT; | ||
109 | } | ||
110 | |||
111 | hdr->read_idx++; | ||
112 | if (hdr->read_idx >= hdr->num_ents) | ||
113 | hdr->read_idx = 0; | ||
114 | |||
115 | entry++; | ||
116 | copied += sizeof(*entry); | ||
117 | size -= sizeof(*entry); | ||
118 | } | ||
119 | |||
120 | gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
121 | hdr->read_idx); | ||
122 | |||
123 | *off = hdr->read_idx; | ||
124 | mutex_unlock(&dev->lock); | ||
125 | |||
126 | return copied; | ||
127 | } | ||
128 | |||
129 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
130 | { | ||
131 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
132 | dev->write_enabled = true; | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
137 | { | ||
138 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
139 | dev->write_enabled = false; | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, | ||
144 | size_t size) | ||
145 | { | ||
146 | struct nvgpu_ctxsw_ring_header *hdr; | ||
147 | |||
148 | if (atomic_read(&dev->vma_ref)) | ||
149 | return -EBUSY; | ||
150 | |||
151 | if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) | ||
152 | return -EBUSY; | ||
153 | |||
154 | size = roundup(size, PAGE_SIZE); | ||
155 | hdr = vmalloc_user(size); | ||
156 | if (!hdr) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | if (dev->hdr) | ||
160 | vfree(dev->hdr); | ||
161 | |||
162 | dev->hdr = hdr; | ||
163 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
164 | dev->size = size; | ||
165 | |||
166 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
167 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
168 | hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
169 | / sizeof(struct nvgpu_ctxsw_trace_entry); | ||
170 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); | ||
171 | hdr->drop_count = 0; | ||
172 | hdr->read_idx = 0; | ||
173 | hdr->write_idx = 0; | ||
174 | hdr->write_seqno = 0; | ||
175 | |||
176 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
177 | dev->size, dev->hdr, dev->ents, hdr->num_ents); | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
182 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
183 | { | ||
184 | size_t size = args->size; | ||
185 | |||
186 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
187 | |||
188 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
189 | return -EINVAL; | ||
190 | |||
191 | return gk20a_ctxsw_dev_ring_alloc(dev, size); | ||
192 | } | ||
193 | |||
194 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
195 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
196 | { | ||
197 | dev->filter = args->filter; | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
202 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
203 | { | ||
204 | args->filter = dev->filter; | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
209 | { | ||
210 | struct gk20a *g = dev->g; | ||
211 | int err; | ||
212 | |||
213 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
214 | |||
215 | err = gk20a_busy(g->dev); | ||
216 | if (err) | ||
217 | return err; | ||
218 | |||
219 | if (g->ops.fecs_trace.flush(g)) | ||
220 | err = g->ops.fecs_trace.flush(g); | ||
221 | |||
222 | if (likely(!err)) | ||
223 | err = g->ops.fecs_trace.poll(g); | ||
224 | |||
225 | gk20a_idle(g->dev); | ||
226 | return err; | ||
227 | } | ||
228 | |||
229 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
230 | { | ||
231 | struct gk20a *g; | ||
232 | struct gk20a_ctxsw_trace *trace; | ||
233 | struct gk20a_ctxsw_dev *dev; | ||
234 | int err; | ||
235 | size_t size; | ||
236 | u32 n; | ||
237 | |||
238 | /* only one VM for now */ | ||
239 | const int vmid = 0; | ||
240 | |||
241 | g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); | ||
242 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
243 | |||
244 | if (!capable(CAP_SYS_ADMIN)) | ||
245 | return -EPERM; | ||
246 | |||
247 | err = gk20a_busy(g->dev); | ||
248 | if (err) | ||
249 | return err; | ||
250 | |||
251 | trace = g->ctxsw_trace; | ||
252 | if (!trace) { | ||
253 | err = -ENODEV; | ||
254 | goto idle; | ||
255 | } | ||
256 | |||
257 | /* Allow only one user for this device */ | ||
258 | dev = &trace->devs[vmid]; | ||
259 | mutex_lock(&dev->lock); | ||
260 | if (dev->hdr) { | ||
261 | err = -EBUSY; | ||
262 | goto done; | ||
263 | } | ||
264 | |||
265 | /* By default, allocate ring buffer big enough to accommodate | ||
266 | * FECS records with default event filter */ | ||
267 | |||
268 | /* enable all traces by default */ | ||
269 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
270 | |||
271 | /* compute max number of entries generated with this filter */ | ||
272 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
273 | |||
274 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
275 | n * sizeof(struct nvgpu_ctxsw_trace_entry); | ||
276 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
277 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); | ||
278 | |||
279 | err = gk20a_ctxsw_dev_ring_alloc(dev, size); | ||
280 | if (!err) { | ||
281 | filp->private_data = dev; | ||
282 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
283 | filp, dev, size); | ||
284 | } | ||
285 | |||
286 | err = g->ops.fecs_trace.enable(g); | ||
287 | |||
288 | done: | ||
289 | mutex_unlock(&dev->lock); | ||
290 | |||
291 | idle: | ||
292 | gk20a_idle(g->dev); | ||
293 | |||
294 | return err; | ||
295 | } | ||
296 | |||
297 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
298 | { | ||
299 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
300 | struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); | ||
301 | |||
302 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
303 | |||
304 | mutex_lock(&dev->lock); | ||
305 | dev->write_enabled = false; | ||
306 | if (dev->hdr) { | ||
307 | vfree(dev->hdr); | ||
308 | dev->hdr = NULL; | ||
309 | } | ||
310 | |||
311 | g->ops.fecs_trace.disable(g); | ||
312 | |||
313 | mutex_unlock(&dev->lock); | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
319 | unsigned long arg) | ||
320 | { | ||
321 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
322 | struct gk20a *g = dev->g; | ||
323 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
324 | int err = 0; | ||
325 | |||
326 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
327 | |||
328 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
329 | || (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST)) | ||
330 | return -EINVAL; | ||
331 | |||
332 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE); | ||
333 | |||
334 | memset(buf, 0, sizeof(buf)); | ||
335 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
336 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
337 | return -EFAULT; | ||
338 | } | ||
339 | |||
340 | mutex_lock(&dev->lock); | ||
341 | |||
342 | switch (cmd) { | ||
343 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
344 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
345 | break; | ||
346 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
347 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
348 | break; | ||
349 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
350 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
351 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
352 | break; | ||
353 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
354 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
355 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
356 | break; | ||
357 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
358 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
359 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
360 | break; | ||
361 | case NVGPU_CTXSW_IOCTL_POLL: | ||
362 | mutex_unlock(&dev->lock); | ||
363 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
364 | mutex_lock(&dev->lock); | ||
365 | break; | ||
366 | default: | ||
367 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
368 | cmd); | ||
369 | err = -ENOTTY; | ||
370 | } | ||
371 | |||
372 | mutex_unlock(&dev->lock); | ||
373 | |||
374 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
375 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
376 | |||
377 | return err; | ||
378 | } | ||
379 | |||
380 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
381 | { | ||
382 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
383 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
384 | unsigned int mask = 0; | ||
385 | |||
386 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
387 | |||
388 | mutex_lock(&dev->lock); | ||
389 | poll_wait(filp, &dev->readout_wq, wait); | ||
390 | if (!ring_is_empty(hdr)) | ||
391 | mask |= POLLIN | POLLRDNORM; | ||
392 | mutex_unlock(&dev->lock); | ||
393 | |||
394 | return mask; | ||
395 | } | ||
396 | |||
397 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
398 | { | ||
399 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
400 | |||
401 | atomic_inc(&dev->vma_ref); | ||
402 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
403 | atomic_read(&dev->vma_ref)); | ||
404 | } | ||
405 | |||
406 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
407 | { | ||
408 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
409 | |||
410 | atomic_dec(&dev->vma_ref); | ||
411 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
412 | atomic_read(&dev->vma_ref)); | ||
413 | } | ||
414 | |||
415 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
416 | .open = gk20a_ctxsw_dev_vma_open, | ||
417 | .close = gk20a_ctxsw_dev_vma_close, | ||
418 | }; | ||
419 | |||
420 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
421 | { | ||
422 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
423 | int ret; | ||
424 | |||
425 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
426 | vma->vm_start, vma->vm_end); | ||
427 | |||
428 | ret = remap_vmalloc_range(vma, dev->hdr, 0); | ||
429 | if (likely(!ret)) { | ||
430 | vma->vm_private_data = dev; | ||
431 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
432 | vma->vm_ops->open(vma); | ||
433 | } | ||
434 | |||
435 | return ret; | ||
436 | } | ||
437 | |||
438 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
439 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
440 | { | ||
441 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
442 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
443 | int i; | ||
444 | |||
445 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
446 | dev->g = g; | ||
447 | dev->hdr = NULL; | ||
448 | dev->write_enabled = false; | ||
449 | init_waitqueue_head(&dev->readout_wq); | ||
450 | mutex_init(&dev->lock); | ||
451 | atomic_set(&dev->vma_ref, 0); | ||
452 | dev++; | ||
453 | } | ||
454 | return 0; | ||
455 | } | ||
456 | #endif | ||
457 | |||
458 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
459 | { | ||
460 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
461 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
462 | int err; | ||
463 | |||
464 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
465 | |||
466 | if (likely(trace)) | ||
467 | return 0; | ||
468 | |||
469 | trace = kzalloc(sizeof(*trace), GFP_KERNEL); | ||
470 | if (unlikely(!trace)) | ||
471 | return -ENOMEM; | ||
472 | g->ctxsw_trace = trace; | ||
473 | |||
474 | err = gk20a_ctxsw_init_devs(g); | ||
475 | if (err) | ||
476 | goto fail; | ||
477 | |||
478 | err = g->ops.fecs_trace.init(g); | ||
479 | if (unlikely(err)) | ||
480 | goto fail; | ||
481 | |||
482 | return 0; | ||
483 | |||
484 | fail: | ||
485 | kfree(trace); | ||
486 | g->ctxsw_trace = NULL; | ||
487 | return err; | ||
488 | #else | ||
489 | return 0; | ||
490 | #endif | ||
491 | } | ||
492 | |||
493 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
494 | { | ||
495 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
496 | kfree(g->ctxsw_trace); | ||
497 | g->ctxsw_trace = NULL; | ||
498 | |||
499 | g->ops.fecs_trace.deinit(g); | ||
500 | #endif | ||
501 | } | ||
502 | |||
503 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
504 | struct nvgpu_ctxsw_trace_entry *entry) | ||
505 | { | ||
506 | struct nvgpu_ctxsw_ring_header *hdr; | ||
507 | struct gk20a_ctxsw_dev *dev; | ||
508 | int ret = 0; | ||
509 | const char *reason; | ||
510 | |||
511 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
512 | return -ENODEV; | ||
513 | |||
514 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
515 | hdr = dev->hdr; | ||
516 | |||
517 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
518 | "dev=%p hdr=%p", dev, hdr); | ||
519 | |||
520 | mutex_lock(&dev->lock); | ||
521 | |||
522 | if (unlikely(!hdr)) { | ||
523 | /* device has been released */ | ||
524 | ret = -ENODEV; | ||
525 | goto done; | ||
526 | } | ||
527 | |||
528 | entry->seqno = hdr->write_seqno++; | ||
529 | |||
530 | if (!dev->write_enabled) { | ||
531 | ret = -EBUSY; | ||
532 | reason = "write disabled"; | ||
533 | goto drop; | ||
534 | } | ||
535 | |||
536 | if (unlikely(ring_is_full(hdr))) { | ||
537 | ret = -ENOSPC; | ||
538 | reason = "user fifo full"; | ||
539 | goto drop; | ||
540 | } | ||
541 | |||
542 | if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
543 | reason = "filtered out"; | ||
544 | goto filter; | ||
545 | } | ||
546 | |||
547 | gk20a_dbg(gpu_dbg_ctxsw, | ||
548 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
549 | entry->seqno, entry->context_id, entry->pid, | ||
550 | entry->tag, entry->timestamp); | ||
551 | |||
552 | dev->ents[hdr->write_idx] = *entry; | ||
553 | |||
554 | /* ensure record is written before updating write index */ | ||
555 | smp_wmb(); | ||
556 | |||
557 | hdr->write_idx++; | ||
558 | if (unlikely(hdr->write_idx >= hdr->num_ents)) | ||
559 | hdr->write_idx = 0; | ||
560 | gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
561 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
562 | |||
563 | mutex_unlock(&dev->lock); | ||
564 | return ret; | ||
565 | |||
566 | drop: | ||
567 | hdr->drop_count++; | ||
568 | |||
569 | filter: | ||
570 | gk20a_dbg(gpu_dbg_ctxsw, | ||
571 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
572 | "tag=%x time=%llx (%s)", | ||
573 | entry->seqno, entry->context_id, entry->pid, | ||
574 | entry->tag, entry->timestamp, reason); | ||
575 | |||
576 | done: | ||
577 | mutex_unlock(&dev->lock); | ||
578 | return ret; | ||
579 | } | ||
580 | |||
581 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
582 | { | ||
583 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[vmid]; | ||
584 | |||
585 | wake_up_interruptible(&dev->readout_wq); | ||
586 | } | ||