diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 727 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 27 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 |
7 files changed, 48 insertions, 747 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0d011b06..546f4164 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
553 | gk20a_dbg_info("freeing bound channel context, timeout=%ld", | 553 | gk20a_dbg_info("freeing bound channel context, timeout=%ld", |
554 | timeout); | 554 | timeout); |
555 | 555 | ||
556 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
556 | if (g->ops.fecs_trace.unbind_channel && !ch->vpr) | 557 | if (g->ops.fecs_trace.unbind_channel && !ch->vpr) |
557 | g->ops.fecs_trace.unbind_channel(g, ch); | 558 | g->ops.fecs_trace.unbind_channel(g, ch); |
559 | #endif | ||
558 | 560 | ||
559 | /* release channel ctx */ | 561 | /* release channel ctx */ |
560 | g->ops.gr.free_channel_ctx(ch, was_tsg); | 562 | g->ops.gr.free_channel_ctx(ch, was_tsg); |
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c deleted file mode 100644 index fb33de23..00000000 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ /dev/null | |||
@@ -1,727 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <asm/barrier.h> | ||
24 | #include <linux/wait.h> | ||
25 | #include <linux/ktime.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <linux/poll.h> | ||
28 | #include <trace/events/gk20a.h> | ||
29 | #include <uapi/linux/nvgpu.h> | ||
30 | |||
31 | #include <nvgpu/kmem.h> | ||
32 | |||
33 | #include "ctxsw_trace_gk20a.h" | ||
34 | #include "gk20a.h" | ||
35 | #include "platform_gk20a.h" | ||
36 | #include "gr_gk20a.h" | ||
37 | #include "common/linux/os_linux.h" | ||
38 | |||
39 | #include <nvgpu/log.h> | ||
40 | #include <nvgpu/atomic.h> | ||
41 | #include <nvgpu/barrier.h> | ||
42 | |||
43 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
44 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
45 | |||
46 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
47 | |||
48 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
49 | struct gk20a_ctxsw_dev { | ||
50 | struct gk20a *g; | ||
51 | |||
52 | struct nvgpu_ctxsw_ring_header *hdr; | ||
53 | struct nvgpu_ctxsw_trace_entry *ents; | ||
54 | struct nvgpu_ctxsw_trace_filter filter; | ||
55 | bool write_enabled; | ||
56 | struct nvgpu_cond readout_wq; | ||
57 | size_t size; | ||
58 | u32 num_ents; | ||
59 | |||
60 | nvgpu_atomic_t vma_ref; | ||
61 | |||
62 | struct nvgpu_mutex write_lock; | ||
63 | }; | ||
64 | |||
65 | |||
66 | struct gk20a_ctxsw_trace { | ||
67 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
68 | }; | ||
69 | |||
70 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
71 | { | ||
72 | return (hdr->write_idx == hdr->read_idx); | ||
73 | } | ||
74 | |||
75 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
76 | { | ||
77 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
78 | } | ||
79 | |||
80 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
81 | { | ||
82 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
83 | } | ||
84 | |||
85 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
86 | loff_t *off) | ||
87 | { | ||
88 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
89 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
90 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
91 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
92 | size_t copied = 0; | ||
93 | int err; | ||
94 | |||
95 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
96 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
97 | |||
98 | nvgpu_mutex_acquire(&dev->write_lock); | ||
99 | while (ring_is_empty(hdr)) { | ||
100 | nvgpu_mutex_release(&dev->write_lock); | ||
101 | if (filp->f_flags & O_NONBLOCK) | ||
102 | return -EAGAIN; | ||
103 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
104 | !ring_is_empty(hdr), 0); | ||
105 | if (err) | ||
106 | return err; | ||
107 | nvgpu_mutex_acquire(&dev->write_lock); | ||
108 | } | ||
109 | |||
110 | while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { | ||
111 | if (ring_is_empty(hdr)) | ||
112 | break; | ||
113 | |||
114 | if (copy_to_user(entry, &dev->ents[hdr->read_idx], | ||
115 | sizeof(*entry))) { | ||
116 | nvgpu_mutex_release(&dev->write_lock); | ||
117 | return -EFAULT; | ||
118 | } | ||
119 | |||
120 | hdr->read_idx++; | ||
121 | if (hdr->read_idx >= hdr->num_ents) | ||
122 | hdr->read_idx = 0; | ||
123 | |||
124 | entry++; | ||
125 | copied += sizeof(*entry); | ||
126 | size -= sizeof(*entry); | ||
127 | } | ||
128 | |||
129 | gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
130 | hdr->read_idx); | ||
131 | |||
132 | *off = hdr->read_idx; | ||
133 | nvgpu_mutex_release(&dev->write_lock); | ||
134 | |||
135 | return copied; | ||
136 | } | ||
137 | |||
138 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
139 | { | ||
140 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
141 | nvgpu_mutex_acquire(&dev->write_lock); | ||
142 | dev->write_enabled = true; | ||
143 | nvgpu_mutex_release(&dev->write_lock); | ||
144 | dev->g->ops.fecs_trace.enable(dev->g); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
149 | { | ||
150 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
151 | dev->g->ops.fecs_trace.disable(dev->g); | ||
152 | nvgpu_mutex_acquire(&dev->write_lock); | ||
153 | dev->write_enabled = false; | ||
154 | nvgpu_mutex_release(&dev->write_lock); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, | ||
159 | size_t size) | ||
160 | { | ||
161 | struct gk20a *g = dev->g; | ||
162 | void *buf; | ||
163 | int err; | ||
164 | |||
165 | if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) | ||
166 | return -EBUSY; | ||
167 | |||
168 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); | ||
169 | if (err) | ||
170 | return err; | ||
171 | |||
172 | |||
173 | dev->hdr = buf; | ||
174 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
175 | dev->size = size; | ||
176 | dev->num_ents = dev->hdr->num_ents; | ||
177 | |||
178 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
179 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
184 | void **buf, size_t *size) | ||
185 | { | ||
186 | struct nvgpu_ctxsw_ring_header *hdr; | ||
187 | |||
188 | *size = roundup(*size, PAGE_SIZE); | ||
189 | hdr = vmalloc_user(*size); | ||
190 | if (!hdr) | ||
191 | return -ENOMEM; | ||
192 | |||
193 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
194 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
195 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
196 | / sizeof(struct nvgpu_ctxsw_trace_entry); | ||
197 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); | ||
198 | hdr->drop_count = 0; | ||
199 | hdr->read_idx = 0; | ||
200 | hdr->write_idx = 0; | ||
201 | hdr->write_seqno = 0; | ||
202 | |||
203 | *buf = hdr; | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
208 | { | ||
209 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
210 | |||
211 | nvgpu_vfree(g, dev->hdr); | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
216 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
217 | { | ||
218 | size_t size = args->size; | ||
219 | int ret; | ||
220 | |||
221 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
222 | |||
223 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
224 | return -EINVAL; | ||
225 | |||
226 | nvgpu_mutex_acquire(&dev->write_lock); | ||
227 | ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
228 | nvgpu_mutex_release(&dev->write_lock); | ||
229 | |||
230 | return ret; | ||
231 | } | ||
232 | |||
233 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
234 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
235 | { | ||
236 | struct gk20a *g = dev->g; | ||
237 | |||
238 | nvgpu_mutex_acquire(&dev->write_lock); | ||
239 | dev->filter = args->filter; | ||
240 | nvgpu_mutex_release(&dev->write_lock); | ||
241 | |||
242 | if (g->ops.fecs_trace.set_filter) | ||
243 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
248 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
249 | { | ||
250 | nvgpu_mutex_acquire(&dev->write_lock); | ||
251 | args->filter = dev->filter; | ||
252 | nvgpu_mutex_release(&dev->write_lock); | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
258 | { | ||
259 | struct gk20a *g = dev->g; | ||
260 | int err; | ||
261 | |||
262 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
263 | |||
264 | err = gk20a_busy(g); | ||
265 | if (err) | ||
266 | return err; | ||
267 | |||
268 | if (g->ops.fecs_trace.flush) | ||
269 | err = g->ops.fecs_trace.flush(g); | ||
270 | |||
271 | if (likely(!err)) | ||
272 | err = g->ops.fecs_trace.poll(g); | ||
273 | |||
274 | gk20a_idle(g); | ||
275 | return err; | ||
276 | } | ||
277 | |||
278 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
279 | { | ||
280 | struct nvgpu_os_linux *l; | ||
281 | struct gk20a *g; | ||
282 | struct gk20a_ctxsw_trace *trace; | ||
283 | struct gk20a_ctxsw_dev *dev; | ||
284 | int err; | ||
285 | size_t size; | ||
286 | u32 n; | ||
287 | |||
288 | /* only one VM for now */ | ||
289 | const int vmid = 0; | ||
290 | |||
291 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); | ||
292 | g = gk20a_get(&l->g); | ||
293 | if (!g) | ||
294 | return -ENODEV; | ||
295 | |||
296 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
297 | |||
298 | if (!capable(CAP_SYS_ADMIN)) { | ||
299 | err = -EPERM; | ||
300 | goto free_ref; | ||
301 | } | ||
302 | |||
303 | err = gk20a_busy(g); | ||
304 | if (err) | ||
305 | goto free_ref; | ||
306 | |||
307 | trace = g->ctxsw_trace; | ||
308 | if (!trace) { | ||
309 | err = -ENODEV; | ||
310 | goto idle; | ||
311 | } | ||
312 | |||
313 | /* Allow only one user for this device */ | ||
314 | dev = &trace->devs[vmid]; | ||
315 | nvgpu_mutex_acquire(&dev->write_lock); | ||
316 | if (dev->hdr) { | ||
317 | err = -EBUSY; | ||
318 | goto done; | ||
319 | } | ||
320 | |||
321 | /* By default, allocate ring buffer big enough to accommodate | ||
322 | * FECS records with default event filter */ | ||
323 | |||
324 | /* enable all traces by default */ | ||
325 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
326 | |||
327 | /* compute max number of entries generated with this filter */ | ||
328 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
329 | |||
330 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
331 | n * sizeof(struct nvgpu_ctxsw_trace_entry); | ||
332 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
333 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); | ||
334 | |||
335 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
336 | if (!err) { | ||
337 | filp->private_data = dev; | ||
338 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
339 | filp, dev, size); | ||
340 | } | ||
341 | |||
342 | done: | ||
343 | nvgpu_mutex_release(&dev->write_lock); | ||
344 | |||
345 | idle: | ||
346 | gk20a_idle(g); | ||
347 | free_ref: | ||
348 | if (err) | ||
349 | gk20a_put(g); | ||
350 | return err; | ||
351 | } | ||
352 | |||
353 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
354 | { | ||
355 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
356 | struct gk20a *g = dev->g; | ||
357 | |||
358 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
359 | |||
360 | g->ops.fecs_trace.disable(g); | ||
361 | |||
362 | nvgpu_mutex_acquire(&dev->write_lock); | ||
363 | dev->write_enabled = false; | ||
364 | nvgpu_mutex_release(&dev->write_lock); | ||
365 | |||
366 | if (dev->hdr) { | ||
367 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); | ||
368 | dev->hdr = NULL; | ||
369 | } | ||
370 | gk20a_put(g); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
375 | unsigned long arg) | ||
376 | { | ||
377 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
378 | struct gk20a *g = dev->g; | ||
379 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
380 | int err = 0; | ||
381 | |||
382 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
383 | |||
384 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || | ||
385 | (_IOC_NR(cmd) == 0) || | ||
386 | (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || | ||
387 | (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) | ||
388 | return -EINVAL; | ||
389 | |||
390 | memset(buf, 0, sizeof(buf)); | ||
391 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
392 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
393 | return -EFAULT; | ||
394 | } | ||
395 | |||
396 | switch (cmd) { | ||
397 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
398 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
399 | break; | ||
400 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
401 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
402 | break; | ||
403 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
404 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
405 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
406 | break; | ||
407 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
408 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
409 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
410 | break; | ||
411 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
412 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
413 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
414 | break; | ||
415 | case NVGPU_CTXSW_IOCTL_POLL: | ||
416 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
417 | break; | ||
418 | default: | ||
419 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
420 | cmd); | ||
421 | err = -ENOTTY; | ||
422 | } | ||
423 | |||
424 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
425 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
426 | |||
427 | return err; | ||
428 | } | ||
429 | |||
430 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
431 | { | ||
432 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
433 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
434 | unsigned int mask = 0; | ||
435 | |||
436 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
437 | |||
438 | nvgpu_mutex_acquire(&dev->write_lock); | ||
439 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
440 | if (!ring_is_empty(hdr)) | ||
441 | mask |= POLLIN | POLLRDNORM; | ||
442 | nvgpu_mutex_release(&dev->write_lock); | ||
443 | |||
444 | return mask; | ||
445 | } | ||
446 | |||
447 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
448 | { | ||
449 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
450 | |||
451 | nvgpu_atomic_inc(&dev->vma_ref); | ||
452 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
453 | nvgpu_atomic_read(&dev->vma_ref)); | ||
454 | } | ||
455 | |||
456 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
457 | { | ||
458 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
459 | |||
460 | nvgpu_atomic_dec(&dev->vma_ref); | ||
461 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
462 | nvgpu_atomic_read(&dev->vma_ref)); | ||
463 | } | ||
464 | |||
465 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
466 | .open = gk20a_ctxsw_dev_vma_open, | ||
467 | .close = gk20a_ctxsw_dev_vma_close, | ||
468 | }; | ||
469 | |||
470 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
471 | struct vm_area_struct *vma) | ||
472 | { | ||
473 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
474 | } | ||
475 | |||
476 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
477 | { | ||
478 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
479 | int ret; | ||
480 | |||
481 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
482 | vma->vm_start, vma->vm_end); | ||
483 | |||
484 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); | ||
485 | if (likely(!ret)) { | ||
486 | vma->vm_private_data = dev; | ||
487 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
488 | vma->vm_ops->open(vma); | ||
489 | } | ||
490 | |||
491 | return ret; | ||
492 | } | ||
493 | |||
494 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
495 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
496 | { | ||
497 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
498 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
499 | int err; | ||
500 | int i; | ||
501 | |||
502 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
503 | dev->g = g; | ||
504 | dev->hdr = NULL; | ||
505 | dev->write_enabled = false; | ||
506 | nvgpu_cond_init(&dev->readout_wq); | ||
507 | err = nvgpu_mutex_init(&dev->write_lock); | ||
508 | if (err) | ||
509 | return err; | ||
510 | nvgpu_atomic_set(&dev->vma_ref, 0); | ||
511 | dev++; | ||
512 | } | ||
513 | return 0; | ||
514 | } | ||
515 | #endif | ||
516 | |||
517 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
518 | { | ||
519 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
520 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
521 | int err; | ||
522 | |||
523 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
524 | |||
525 | /* if tracing is not supported, skip this */ | ||
526 | if (!g->ops.fecs_trace.init) | ||
527 | return 0; | ||
528 | |||
529 | if (likely(trace)) | ||
530 | return 0; | ||
531 | |||
532 | trace = nvgpu_kzalloc(g, sizeof(*trace)); | ||
533 | if (unlikely(!trace)) | ||
534 | return -ENOMEM; | ||
535 | g->ctxsw_trace = trace; | ||
536 | |||
537 | err = gk20a_ctxsw_init_devs(g); | ||
538 | if (err) | ||
539 | goto fail; | ||
540 | |||
541 | err = g->ops.fecs_trace.init(g); | ||
542 | if (unlikely(err)) | ||
543 | goto fail; | ||
544 | |||
545 | return 0; | ||
546 | |||
547 | fail: | ||
548 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
549 | nvgpu_kfree(g, trace); | ||
550 | g->ctxsw_trace = NULL; | ||
551 | return err; | ||
552 | #else | ||
553 | return 0; | ||
554 | #endif | ||
555 | } | ||
556 | |||
557 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
558 | { | ||
559 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
560 | struct gk20a_ctxsw_trace *trace; | ||
561 | struct gk20a_ctxsw_dev *dev; | ||
562 | int i; | ||
563 | |||
564 | if (!g->ctxsw_trace) | ||
565 | return; | ||
566 | |||
567 | trace = g->ctxsw_trace; | ||
568 | dev = trace->devs; | ||
569 | |||
570 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
571 | nvgpu_mutex_destroy(&dev->write_lock); | ||
572 | dev++; | ||
573 | } | ||
574 | |||
575 | nvgpu_kfree(g, g->ctxsw_trace); | ||
576 | g->ctxsw_trace = NULL; | ||
577 | |||
578 | g->ops.fecs_trace.deinit(g); | ||
579 | #endif | ||
580 | } | ||
581 | |||
582 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
583 | struct nvgpu_ctxsw_trace_entry *entry) | ||
584 | { | ||
585 | struct nvgpu_ctxsw_ring_header *hdr; | ||
586 | struct gk20a_ctxsw_dev *dev; | ||
587 | int ret = 0; | ||
588 | const char *reason; | ||
589 | u32 write_idx; | ||
590 | |||
591 | if (!g->ctxsw_trace) | ||
592 | return 0; | ||
593 | |||
594 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
595 | return -ENODEV; | ||
596 | |||
597 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
598 | hdr = dev->hdr; | ||
599 | |||
600 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
601 | "dev=%p hdr=%p", dev, hdr); | ||
602 | |||
603 | nvgpu_mutex_acquire(&dev->write_lock); | ||
604 | |||
605 | if (unlikely(!hdr)) { | ||
606 | /* device has been released */ | ||
607 | ret = -ENODEV; | ||
608 | goto done; | ||
609 | } | ||
610 | |||
611 | write_idx = hdr->write_idx; | ||
612 | if (write_idx >= dev->num_ents) { | ||
613 | nvgpu_err(dev->g, | ||
614 | "write_idx=%u out of range [0..%u]", | ||
615 | write_idx, dev->num_ents); | ||
616 | ret = -ENOSPC; | ||
617 | reason = "write_idx out of range"; | ||
618 | goto disable; | ||
619 | } | ||
620 | |||
621 | entry->seqno = hdr->write_seqno++; | ||
622 | |||
623 | if (!dev->write_enabled) { | ||
624 | ret = -EBUSY; | ||
625 | reason = "write disabled"; | ||
626 | goto drop; | ||
627 | } | ||
628 | |||
629 | if (unlikely(ring_is_full(hdr))) { | ||
630 | ret = -ENOSPC; | ||
631 | reason = "user fifo full"; | ||
632 | goto drop; | ||
633 | } | ||
634 | |||
635 | if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
636 | reason = "filtered out"; | ||
637 | goto filter; | ||
638 | } | ||
639 | |||
640 | gk20a_dbg(gpu_dbg_ctxsw, | ||
641 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
642 | entry->seqno, entry->context_id, entry->pid, | ||
643 | entry->tag, entry->timestamp); | ||
644 | |||
645 | dev->ents[write_idx] = *entry; | ||
646 | |||
647 | /* ensure record is written before updating write index */ | ||
648 | nvgpu_smp_wmb(); | ||
649 | |||
650 | write_idx++; | ||
651 | if (unlikely(write_idx >= hdr->num_ents)) | ||
652 | write_idx = 0; | ||
653 | hdr->write_idx = write_idx; | ||
654 | gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
655 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
656 | |||
657 | nvgpu_mutex_release(&dev->write_lock); | ||
658 | return ret; | ||
659 | |||
660 | disable: | ||
661 | g->ops.fecs_trace.disable(g); | ||
662 | |||
663 | drop: | ||
664 | hdr->drop_count++; | ||
665 | |||
666 | filter: | ||
667 | gk20a_dbg(gpu_dbg_ctxsw, | ||
668 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
669 | "tag=%x time=%llx (%s)", | ||
670 | entry->seqno, entry->context_id, entry->pid, | ||
671 | entry->tag, entry->timestamp, reason); | ||
672 | |||
673 | done: | ||
674 | nvgpu_mutex_release(&dev->write_lock); | ||
675 | return ret; | ||
676 | } | ||
677 | |||
678 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
679 | { | ||
680 | struct gk20a_ctxsw_dev *dev; | ||
681 | |||
682 | if (!g->ctxsw_trace) | ||
683 | return; | ||
684 | |||
685 | dev = &g->ctxsw_trace->devs[vmid]; | ||
686 | nvgpu_cond_signal_interruptible(&dev->readout_wq); | ||
687 | } | ||
688 | |||
689 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | ||
690 | { | ||
691 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
692 | struct nvgpu_ctxsw_trace_entry entry = { | ||
693 | .vmid = 0, | ||
694 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
695 | .context_id = 0, | ||
696 | .pid = ch->tgid, | ||
697 | }; | ||
698 | |||
699 | if (!g->ctxsw_trace) | ||
700 | return; | ||
701 | |||
702 | g->ops.bus.read_ptimer(g, &entry.timestamp); | ||
703 | gk20a_ctxsw_trace_write(g, &entry); | ||
704 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
705 | #endif | ||
706 | trace_gk20a_channel_reset(ch->chid, ch->tsgid); | ||
707 | } | ||
708 | |||
709 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | ||
710 | { | ||
711 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
712 | struct nvgpu_ctxsw_trace_entry entry = { | ||
713 | .vmid = 0, | ||
714 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
715 | .context_id = 0, | ||
716 | .pid = tsg->tgid, | ||
717 | }; | ||
718 | |||
719 | if (!g->ctxsw_trace) | ||
720 | return; | ||
721 | |||
722 | g->ops.bus.read_ptimer(g, &entry.timestamp); | ||
723 | gk20a_ctxsw_trace_write(g, &entry); | ||
724 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
725 | #endif | ||
726 | trace_gk20a_channel_reset(~0, tsg->tsgid); | ||
727 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h index b270581b..dddb8603 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | |||
@@ -23,6 +23,8 @@ | |||
23 | #ifndef __CTXSW_TRACE_GK20A_H | 23 | #ifndef __CTXSW_TRACE_GK20A_H |
24 | #define __CTXSW_TRACE_GK20A_H | 24 | #define __CTXSW_TRACE_GK20A_H |
25 | 25 | ||
26 | #include <nvgpu/types.h> | ||
27 | |||
26 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | 28 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 |
27 | 29 | ||
28 | struct file; | 30 | struct file; |
@@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); | |||
41 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); | 43 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); |
42 | long gk20a_ctxsw_dev_ioctl(struct file *filp, | 44 | long gk20a_ctxsw_dev_ioctl(struct file *filp, |
43 | unsigned int cmd, unsigned long arg); | 45 | unsigned int cmd, unsigned long arg); |
44 | ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); | 46 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, |
45 | unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); | 47 | size_t size, loff_t *offs); |
46 | int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); | 48 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, |
49 | struct poll_table_struct *pts); | ||
50 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma); | ||
47 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); | 51 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); |
48 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g); | 52 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g); |
49 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); | 53 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); |
50 | 54 | ||
51 | int gk20a_ctxsw_trace_init(struct gk20a *); | 55 | int gk20a_ctxsw_trace_init(struct gk20a *g); |
52 | void gk20a_ctxsw_trace_cleanup(struct gk20a *); | 56 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g); |
53 | int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); | 57 | int gk20a_ctxsw_trace_write(struct gk20a *g, |
58 | struct nvgpu_ctxsw_trace_entry *entry); | ||
54 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); | 59 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); |
55 | 60 | ||
56 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); | 61 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); |
57 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); | 62 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); |
58 | 63 | ||
59 | |||
60 | #endif /* __CTXSW_TRACE_GK20A_H */ | 64 | #endif /* __CTXSW_TRACE_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index d10af9e9..17ae626b 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) | |||
1228 | if (nvgpu_pmu_disable_elpg(g)) | 1228 | if (nvgpu_pmu_disable_elpg(g)) |
1229 | nvgpu_err(g, "failed to set disable elpg"); | 1229 | nvgpu_err(g, "failed to set disable elpg"); |
1230 | } | 1230 | } |
1231 | /* resetting engine will alter read/write index. | 1231 | |
1232 | * need to flush circular buffer before re-enabling FECS. | 1232 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
1233 | /* | ||
1234 | * Resetting engine will alter read/write index. Need to flush | ||
1235 | * circular buffer before re-enabling FECS. | ||
1233 | */ | 1236 | */ |
1234 | if (g->ops.fecs_trace.reset) | 1237 | if (g->ops.fecs_trace.reset) |
1235 | g->ops.fecs_trace.reset(g); | 1238 | g->ops.fecs_trace.reset(g); |
1236 | /*HALT_PIPELINE method, halt GR engine*/ | 1239 | #endif |
1240 | |||
1241 | /* HALT_PIPELINE method, halt GR engine. */ | ||
1237 | if (gr_gk20a_halt_pipe(g)) | 1242 | if (gr_gk20a_halt_pipe(g)) |
1238 | nvgpu_err(g, "failed to HALT gr pipe"); | 1243 | nvgpu_err(g, "failed to HALT gr pipe"); |
1239 | /* resetting engine using mc_enable_r() is not | 1244 | |
1240 | enough, we do full init sequence */ | 1245 | /* |
1246 | * Resetting engine using mc_enable_r() is not enough; we must | ||
1247 | * do full init sequence. | ||
1248 | */ | ||
1241 | gk20a_gr_reset(g); | 1249 | gk20a_gr_reset(g); |
1242 | if (g->support_pmu && g->can_elpg) | 1250 | if (g->support_pmu && g->can_elpg) |
1243 | nvgpu_pmu_enable_elpg(g); | 1251 | nvgpu_pmu_enable_elpg(g); |
@@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault( | |||
1618 | } | 1626 | } |
1619 | } | 1627 | } |
1620 | } | 1628 | } |
1629 | |||
1630 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
1621 | /* | 1631 | /* |
1622 | * For non fake mmu fault, both tsg and ch pointers | 1632 | * For non fake mmu fault, both tsg and ch pointers |
1623 | * could be valid. Check tsg first. | 1633 | * could be valid. Check tsg first. |
@@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault( | |||
1626 | gk20a_ctxsw_trace_tsg_reset(g, tsg); | 1636 | gk20a_ctxsw_trace_tsg_reset(g, tsg); |
1627 | else if (ch) | 1637 | else if (ch) |
1628 | gk20a_ctxsw_trace_channel_reset(g, ch); | 1638 | gk20a_ctxsw_trace_channel_reset(g, ch); |
1639 | #endif | ||
1629 | 1640 | ||
1630 | /* disable the channel/TSG from hw and increment | 1641 | /* |
1631 | * syncpoints */ | 1642 | * Disable the channel/TSG from hw and increment syncpoints. |
1632 | 1643 | */ | |
1633 | if (tsg) { | 1644 | if (tsg) { |
1634 | if (!g->fifo.deferred_reset_pending) { | 1645 | if (!g->fifo.deferred_reset_pending) { |
1635 | if (!fake_fault) | 1646 | if (!fake_fault) |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 47f6c56c..703a7c0c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
295 | goto done; | 295 | goto done; |
296 | } | 296 | } |
297 | 297 | ||
298 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
298 | err = gk20a_ctxsw_trace_init(g); | 299 | err = gk20a_ctxsw_trace_init(g); |
299 | if (err) | 300 | if (err) |
300 | nvgpu_warn(g, "could not initialize ctxsw tracing"); | 301 | nvgpu_warn(g, "could not initialize ctxsw tracing"); |
302 | #endif | ||
301 | 303 | ||
302 | err = gk20a_sched_ctrl_init(g); | 304 | err = gk20a_sched_ctrl_init(g); |
303 | if (err) { | 305 | if (err) { |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d7fdffb0..a34f06b2 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -689,18 +689,25 @@ struct gpu_ops { | |||
689 | int (*get_netlist_name)(struct gk20a *g, int index, char *name); | 689 | int (*get_netlist_name)(struct gk20a *g, int index, char *name); |
690 | bool (*is_fw_defined)(void); | 690 | bool (*is_fw_defined)(void); |
691 | } gr_ctx; | 691 | } gr_ctx; |
692 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
693 | /* | ||
694 | * Currently only supported on Linux due to the extremely tight | ||
695 | * integration with Linux device driver structure (in particular | ||
696 | * mmap). | ||
697 | */ | ||
692 | struct { | 698 | struct { |
693 | int (*init)(struct gk20a *g); | 699 | int (*init)(struct gk20a *g); |
694 | int (*max_entries)(struct gk20a *, | 700 | int (*max_entries)(struct gk20a *, |
695 | struct nvgpu_ctxsw_trace_filter *); | 701 | struct nvgpu_ctxsw_trace_filter *filter); |
696 | int (*flush)(struct gk20a *g); | 702 | int (*flush)(struct gk20a *g); |
697 | int (*poll)(struct gk20a *g); | 703 | int (*poll)(struct gk20a *g); |
698 | int (*enable)(struct gk20a *g); | 704 | int (*enable)(struct gk20a *g); |
699 | int (*disable)(struct gk20a *g); | 705 | int (*disable)(struct gk20a *g); |
700 | bool (*is_enabled)(struct gk20a *g); | 706 | bool (*is_enabled)(struct gk20a *g); |
701 | int (*reset)(struct gk20a *g); | 707 | int (*reset)(struct gk20a *g); |
702 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); | 708 | int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch); |
703 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); | 709 | int (*unbind_channel)(struct gk20a *g, |
710 | struct channel_gk20a *ch); | ||
704 | int (*deinit)(struct gk20a *g); | 711 | int (*deinit)(struct gk20a *g); |
705 | int (*alloc_user_buffer)(struct gk20a *g, | 712 | int (*alloc_user_buffer)(struct gk20a *g, |
706 | void **buf, size_t *size); | 713 | void **buf, size_t *size); |
@@ -710,6 +717,7 @@ struct gpu_ops { | |||
710 | int (*set_filter)(struct gk20a *g, | 717 | int (*set_filter)(struct gk20a *g, |
711 | struct nvgpu_ctxsw_trace_filter *filter); | 718 | struct nvgpu_ctxsw_trace_filter *filter); |
712 | } fecs_trace; | 719 | } fecs_trace; |
720 | #endif | ||
713 | struct { | 721 | struct { |
714 | bool (*support_sparse)(struct gk20a *g); | 722 | bool (*support_sparse)(struct gk20a *g); |
715 | u64 (*gmmu_map)(struct vm_gk20a *vm, | 723 | u64 (*gmmu_map)(struct vm_gk20a *vm, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 1ea59a9d..f78d862c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
3070 | "fail to load golden ctx image"); | 3070 | "fail to load golden ctx image"); |
3071 | goto out; | 3071 | goto out; |
3072 | } | 3072 | } |
3073 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
3073 | if (g->ops.fecs_trace.bind_channel && !c->vpr) { | 3074 | if (g->ops.fecs_trace.bind_channel && !c->vpr) { |
3074 | err = g->ops.fecs_trace.bind_channel(g, c); | 3075 | err = g->ops.fecs_trace.bind_channel(g, c); |
3075 | if (err) { | 3076 | if (err) |
3076 | nvgpu_warn(g, | 3077 | nvgpu_warn(g, |
3077 | "fail to bind channel for ctxsw trace"); | 3078 | "fail to bind channel for ctxsw trace"); |
3078 | } | ||
3079 | } | 3079 | } |
3080 | #endif | ||
3080 | c->first_init = true; | 3081 | c->first_init = true; |
3081 | } | 3082 | } |
3082 | 3083 | ||