summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/channel.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/channel.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c660
1 files changed, 660 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
new file mode 100644
index 00000000..40b11b86
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -0,0 +1,660 @@
1/*
2 * Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h>
19#include <nvgpu/ltc.h>
20
21/*
22 * This is required for nvgpu_vm_find_buf() which is used in the tracing
23 * code. Once we can get and access userspace buffers without requiring
24 * direct dma_buf usage this can be removed.
25 */
26#include <nvgpu/linux/vm.h>
27
28#include "gk20a/gk20a.h"
29
30#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
31
32#include <linux/uaccess.h>
33#include <linux/dma-buf.h>
34#include <trace/events/gk20a.h>
35#include <uapi/linux/nvgpu.h>
36
37u32 nvgpu_get_gpfifo_entry_size(void)
38{
39 return sizeof(struct nvgpu_gpfifo);
40}
41
42#ifdef CONFIG_DEBUG_FS
43static void trace_write_pushbuffer(struct channel_gk20a *c,
44 struct nvgpu_gpfifo *g)
45{
46 void *mem = NULL;
47 unsigned int words;
48 u64 offset;
49 struct dma_buf *dmabuf = NULL;
50
51 if (gk20a_debug_trace_cmdbuf) {
52 u64 gpu_va = (u64)g->entry0 |
53 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
54 int err;
55
56 words = pbdma_gp_entry1_length_v(g->entry1);
57 err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
58 if (!err)
59 mem = dma_buf_vmap(dmabuf);
60 }
61
62 if (mem) {
63 u32 i;
64 /*
65 * Write in batches of 128 as there seems to be a limit
66 * of how much you can output to ftrace at once.
67 */
68 for (i = 0; i < words; i += 128U) {
69 trace_gk20a_push_cmdbuf(
70 c->g->name,
71 0,
72 min(words - i, 128U),
73 offset + i * sizeof(u32),
74 mem);
75 }
76 dma_buf_vunmap(dmabuf, mem);
77 }
78}
79#endif
80
81static void trace_write_pushbuffer_range(struct channel_gk20a *c,
82 struct nvgpu_gpfifo *g,
83 struct nvgpu_gpfifo __user *user_gpfifo,
84 int offset,
85 int count)
86{
87#ifdef CONFIG_DEBUG_FS
88 u32 size;
89 int i;
90 struct nvgpu_gpfifo *gp;
91 bool gpfifo_allocated = false;
92
93 if (!gk20a_debug_trace_cmdbuf)
94 return;
95
96 if (!g && !user_gpfifo)
97 return;
98
99 if (!g) {
100 size = count * sizeof(struct nvgpu_gpfifo);
101 if (size) {
102 g = nvgpu_big_malloc(c->g, size);
103 if (!g)
104 return;
105
106 if (copy_from_user(g, user_gpfifo, size)) {
107 nvgpu_big_free(c->g, g);
108 return;
109 }
110 }
111 gpfifo_allocated = true;
112 }
113
114 gp = g + offset;
115 for (i = 0; i < count; i++, gp++)
116 trace_write_pushbuffer(c, gp);
117
118 if (gpfifo_allocated)
119 nvgpu_big_free(c->g, g);
120#endif
121}
122
123/*
124 * Handle the submit synchronization - pre-fences and post-fences.
125 */
126static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
127 struct nvgpu_fence *fence,
128 struct channel_gk20a_job *job,
129 struct priv_cmd_entry **wait_cmd,
130 struct priv_cmd_entry **incr_cmd,
131 struct gk20a_fence **pre_fence,
132 struct gk20a_fence **post_fence,
133 bool force_need_sync_fence,
134 bool register_irq,
135 u32 flags)
136{
137 struct gk20a *g = c->g;
138 bool need_sync_fence = false;
139 bool new_sync_created = false;
140 int wait_fence_fd = -1;
141 int err = 0;
142 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
143 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
144
145 /*
146 * If user wants to always allocate sync_fence_fds then respect that;
147 * otherwise, allocate sync_fence_fd based on user flags.
148 */
149 if (force_need_sync_fence)
150 need_sync_fence = true;
151
152 if (g->aggressive_sync_destroy_thresh) {
153 nvgpu_mutex_acquire(&c->sync_lock);
154 if (!c->sync) {
155 c->sync = gk20a_channel_sync_create(c);
156 if (!c->sync) {
157 err = -ENOMEM;
158 nvgpu_mutex_release(&c->sync_lock);
159 goto fail;
160 }
161 new_sync_created = true;
162 }
163 nvgpu_atomic_inc(&c->sync->refcount);
164 nvgpu_mutex_release(&c->sync_lock);
165 }
166
167 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
168 err = g->ops.fifo.resetup_ramfc(c);
169 if (err)
170 goto fail;
171 }
172
173 /*
174 * Optionally insert syncpt wait in the beginning of gpfifo submission
175 * when user requested and the wait hasn't expired. Validate that the id
176 * makes sense, elide if not. The only reason this isn't being
177 * unceremoniously killed is to keep running some tests which trigger
178 * this condition.
179 */
180 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
181 job->pre_fence = gk20a_alloc_fence(c);
182 if (!job->pre_fence) {
183 err = -ENOMEM;
184 goto fail;
185 }
186
187 if (!pre_alloc_enabled)
188 job->wait_cmd = nvgpu_kzalloc(g,
189 sizeof(struct priv_cmd_entry));
190
191 if (!job->wait_cmd) {
192 err = -ENOMEM;
193 goto clean_up_pre_fence;
194 }
195
196 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
197 wait_fence_fd = fence->id;
198 err = c->sync->wait_fd(c->sync, wait_fence_fd,
199 job->wait_cmd, job->pre_fence);
200 } else {
201 err = c->sync->wait_syncpt(c->sync, fence->id,
202 fence->value, job->wait_cmd,
203 job->pre_fence);
204 }
205
206 if (!err) {
207 if (job->wait_cmd->valid)
208 *wait_cmd = job->wait_cmd;
209 *pre_fence = job->pre_fence;
210 } else
211 goto clean_up_wait_cmd;
212 }
213
214 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
215 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
216 need_sync_fence = true;
217
218 /*
219 * Always generate an increment at the end of a GPFIFO submission. This
220 * is used to keep track of method completion for idle railgating. The
221 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
222 */
223 job->post_fence = gk20a_alloc_fence(c);
224 if (!job->post_fence) {
225 err = -ENOMEM;
226 goto clean_up_wait_cmd;
227 }
228 if (!pre_alloc_enabled)
229 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
230
231 if (!job->incr_cmd) {
232 err = -ENOMEM;
233 goto clean_up_post_fence;
234 }
235
236 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
237 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
238 job->post_fence, need_wfi, need_sync_fence,
239 register_irq);
240 else
241 err = c->sync->incr(c->sync, job->incr_cmd,
242 job->post_fence, need_sync_fence,
243 register_irq);
244 if (!err) {
245 *incr_cmd = job->incr_cmd;
246 *post_fence = job->post_fence;
247 } else
248 goto clean_up_incr_cmd;
249
250 return 0;
251
252clean_up_incr_cmd:
253 free_priv_cmdbuf(c, job->incr_cmd);
254 if (!pre_alloc_enabled)
255 job->incr_cmd = NULL;
256clean_up_post_fence:
257 gk20a_fence_put(job->post_fence);
258 job->post_fence = NULL;
259clean_up_wait_cmd:
260 free_priv_cmdbuf(c, job->wait_cmd);
261 if (!pre_alloc_enabled)
262 job->wait_cmd = NULL;
263clean_up_pre_fence:
264 gk20a_fence_put(job->pre_fence);
265 job->pre_fence = NULL;
266fail:
267 *wait_cmd = NULL;
268 *pre_fence = NULL;
269 return err;
270}
271
272static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
273 struct priv_cmd_entry *cmd)
274{
275 struct gk20a *g = c->g;
276 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
277 struct nvgpu_gpfifo x = {
278 .entry0 = u64_lo32(cmd->gva),
279 .entry1 = u64_hi32(cmd->gva) |
280 pbdma_gp_entry1_length_f(cmd->size)
281 };
282
283 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
284 &x, sizeof(x));
285
286 if (cmd->mem->aperture == APERTURE_SYSMEM)
287 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
288 cmd->mem->cpu_va + cmd->off * sizeof(u32));
289
290 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
291}
292
293/*
294 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
295 * splitting into two memcpys to handle wrap-around.
296 */
297static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
298 struct nvgpu_gpfifo *kern_gpfifo,
299 struct nvgpu_gpfifo __user *user_gpfifo,
300 u32 num_entries)
301{
302 /* byte offsets */
303 u32 gpfifo_size = c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo);
304 u32 len = num_entries * sizeof(struct nvgpu_gpfifo);
305 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo);
306 u32 end = start + len; /* exclusive */
307 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
308 struct nvgpu_gpfifo *cpu_src;
309 int err;
310
311 if (user_gpfifo && !c->gpfifo.pipe) {
312 /*
313 * This path (from userspace to sysmem) is special in order to
314 * avoid two copies unnecessarily (from user to pipe, then from
315 * pipe to gpu sysmem buffer).
316 *
317 * As a special case, the pipe buffer exists if PRAMIN writes
318 * are forced, although the buffers may not be in vidmem in
319 * that case.
320 */
321 if (end > gpfifo_size) {
322 /* wrap-around */
323 int length0 = gpfifo_size - start;
324 int length1 = len - length0;
325 void __user *user2 = (u8 __user *)user_gpfifo + length0;
326
327 err = copy_from_user(gpfifo_mem->cpu_va + start,
328 user_gpfifo, length0);
329 if (err)
330 return err;
331
332 err = copy_from_user(gpfifo_mem->cpu_va,
333 user2, length1);
334 if (err)
335 return err;
336 } else {
337 err = copy_from_user(gpfifo_mem->cpu_va + start,
338 user_gpfifo, len);
339 if (err)
340 return err;
341 }
342
343 trace_write_pushbuffer_range(c, NULL, user_gpfifo,
344 0, num_entries);
345 goto out;
346 } else if (user_gpfifo) {
347 /* from userspace to vidmem or sysmem when pramin forced, use
348 * the common copy path below */
349 err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
350 if (err)
351 return err;
352
353 cpu_src = c->gpfifo.pipe;
354 } else {
355 /* from kernel to either sysmem or vidmem, don't need
356 * copy_from_user so use the common path below */
357 cpu_src = kern_gpfifo;
358 }
359
360 if (end > gpfifo_size) {
361 /* wrap-around */
362 int length0 = gpfifo_size - start;
363 int length1 = len - length0;
364 void *src2 = (u8 *)cpu_src + length0;
365
366 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
367 nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
368 } else {
369 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
370
371 }
372
373 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
374
375out:
376 c->gpfifo.put = (c->gpfifo.put + num_entries) &
377 (c->gpfifo.entry_num - 1);
378
379 return 0;
380}
381
382int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
383 struct nvgpu_gpfifo *gpfifo,
384 struct nvgpu_submit_gpfifo_args *args,
385 u32 num_entries,
386 u32 flags,
387 struct nvgpu_fence *fence,
388 struct gk20a_fence **fence_out,
389 bool force_need_sync_fence,
390 struct fifo_profile_gk20a *profile)
391{
392 struct gk20a *g = c->g;
393 struct priv_cmd_entry *wait_cmd = NULL;
394 struct priv_cmd_entry *incr_cmd = NULL;
395 struct gk20a_fence *pre_fence = NULL;
396 struct gk20a_fence *post_fence = NULL;
397 struct channel_gk20a_job *job = NULL;
398 /* we might need two extra gpfifo entries - one for pre fence
399 * and one for post fence. */
400 const int extra_entries = 2;
401 bool skip_buffer_refcounting = (flags &
402 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
403 int err = 0;
404 bool need_job_tracking;
405 bool need_deferred_cleanup = false;
406 struct nvgpu_gpfifo __user *user_gpfifo = args ?
407 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
408
409 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
410 return -ENODEV;
411
412 if (c->has_timedout)
413 return -ETIMEDOUT;
414
415 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
416 return -ENOMEM;
417
418 /* fifo not large enough for request. Return error immediately.
419 * Kernel can insert gpfifo entries before and after user gpfifos.
420 * So, add extra_entries in user request. Also, HW with fifo size N
421 * can accept only N-1 entreis and so the below condition */
422 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
423 nvgpu_err(g, "not enough gpfifo space allocated");
424 return -ENOMEM;
425 }
426
427 if (!gpfifo && !args)
428 return -EINVAL;
429
430 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
431 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
432 !fence)
433 return -EINVAL;
434
435 /* an address space needs to have been bound at this point. */
436 if (!gk20a_channel_as_bound(c)) {
437 nvgpu_err(g,
438 "not bound to an address space at time of gpfifo"
439 " submission.");
440 return -EINVAL;
441 }
442
443 if (profile)
444 profile->timestamp[PROFILE_ENTRY] = sched_clock();
445
446 /* update debug settings */
447 nvgpu_ltc_sync_enabled(g);
448
449 gk20a_dbg_info("channel %d", c->chid);
450
451 /*
452 * Job tracking is necessary for any of the following conditions:
453 * - pre- or post-fence functionality
454 * - channel wdt
455 * - GPU rail-gating with non-deterministic channels
456 * - buffer refcounting
457 *
458 * If none of the conditions are met, then job tracking is not
459 * required and a fast submit can be done (ie. only need to write
460 * out userspace GPFIFO entries and update GP_PUT).
461 */
462 need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
463 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
464 c->wdt_enabled ||
465 (g->can_railgate && !c->deterministic) ||
466 !skip_buffer_refcounting;
467
468 if (need_job_tracking) {
469 bool need_sync_framework = false;
470
471 /*
472 * If the channel is to have deterministic latency and
473 * job tracking is required, the channel must have
474 * pre-allocated resources. Otherwise, we fail the submit here
475 */
476 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
477 return -EINVAL;
478
479 need_sync_framework = force_need_sync_fence ||
480 gk20a_channel_sync_needs_sync_framework(g) ||
481 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
482 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
483 flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
484
485 /*
486 * Deferred clean-up is necessary for any of the following
487 * conditions:
488 * - channel's deterministic flag is not set
489 * - dependency on sync framework, which could make the
490 * behavior of the clean-up operation non-deterministic
491 * (should not be performed in the submit path)
492 * - channel wdt
493 * - GPU rail-gating with non-deterministic channels
494 * - buffer refcounting
495 *
496 * If none of the conditions are met, then deferred clean-up
497 * is not required, and we clean-up one job-tracking
498 * resource in the submit path.
499 */
500 need_deferred_cleanup = !c->deterministic ||
501 need_sync_framework ||
502 c->wdt_enabled ||
503 (g->can_railgate &&
504 !c->deterministic) ||
505 !skip_buffer_refcounting;
506
507 /*
508 * For deterministic channels, we don't allow deferred clean_up
509 * processing to occur. In cases we hit this, we fail the submit
510 */
511 if (c->deterministic && need_deferred_cleanup)
512 return -EINVAL;
513
514 if (!c->deterministic) {
515 /*
516 * Get a power ref unless this is a deterministic
517 * channel that holds them during the channel lifetime.
518 * This one is released by gk20a_channel_clean_up_jobs,
519 * via syncpt or sema interrupt, whichever is used.
520 */
521 err = gk20a_busy(g);
522 if (err) {
523 nvgpu_err(g,
524 "failed to host gk20a to submit gpfifo, process %s",
525 current->comm);
526 return err;
527 }
528 }
529
530 if (!need_deferred_cleanup) {
531 /* clean up a single job */
532 gk20a_channel_clean_up_jobs(c, false);
533 }
534 }
535
536
537 /* Grab access to HW to deal with do_idle */
538 if (c->deterministic)
539 nvgpu_rwsem_down_read(&g->deterministic_busy);
540
541 if (c->deterministic && c->deterministic_railgate_allowed) {
542 /*
543 * Nope - this channel has dropped its own power ref. As
544 * deterministic submits don't hold power on per each submitted
545 * job like normal ones do, the GPU might railgate any time now
546 * and thus submit is disallowed.
547 */
548 err = -EINVAL;
549 goto clean_up;
550 }
551
552 trace_gk20a_channel_submit_gpfifo(g->name,
553 c->chid,
554 num_entries,
555 flags,
556 fence ? fence->id : 0,
557 fence ? fence->value : 0);
558
559 gk20a_dbg_info("pre-submit put %d, get %d, size %d",
560 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
561
562 /*
563 * Make sure we have enough space for gpfifo entries. Check cached
564 * values first and then read from HW. If no space, return EAGAIN
565 * and let userpace decide to re-try request or not.
566 */
567 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
568 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
569 err = -EAGAIN;
570 goto clean_up;
571 }
572 }
573
574 if (c->has_timedout) {
575 err = -ETIMEDOUT;
576 goto clean_up;
577 }
578
579 if (need_job_tracking) {
580 err = channel_gk20a_alloc_job(c, &job);
581 if (err)
582 goto clean_up;
583
584 err = gk20a_submit_prepare_syncs(c, fence, job,
585 &wait_cmd, &incr_cmd,
586 &pre_fence, &post_fence,
587 force_need_sync_fence,
588 need_deferred_cleanup,
589 flags);
590 if (err)
591 goto clean_up_job;
592 }
593
594 if (profile)
595 profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock();
596
597 if (wait_cmd)
598 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
599
600 if (gpfifo || user_gpfifo)
601 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
602 num_entries);
603 if (err)
604 goto clean_up_job;
605
606 /*
607 * And here's where we add the incr_cmd we generated earlier. It should
608 * always run!
609 */
610 if (incr_cmd)
611 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
612
613 if (fence_out)
614 *fence_out = gk20a_fence_get(post_fence);
615
616 if (need_job_tracking)
617 /* TODO! Check for errors... */
618 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
619 if (profile)
620 profile->timestamp[PROFILE_APPEND] = sched_clock();
621
622 g->ops.fifo.userd_gp_put(g, c);
623
624 if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & flags) &&
625 g->ops.fifo.reschedule_runlist)
626 g->ops.fifo.reschedule_runlist(g, c->runlist_id);
627
628 /* No hw access beyond this point */
629 if (c->deterministic)
630 nvgpu_rwsem_up_read(&g->deterministic_busy);
631
632 trace_gk20a_channel_submitted_gpfifo(g->name,
633 c->chid,
634 num_entries,
635 flags,
636 post_fence ? post_fence->syncpt_id : 0,
637 post_fence ? post_fence->syncpt_value : 0);
638
639 gk20a_dbg_info("post-submit put %d, get %d, size %d",
640 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
641
642 if (profile)
643 profile->timestamp[PROFILE_END] = sched_clock();
644 gk20a_dbg_fn("done");
645 return err;
646
647clean_up_job:
648 channel_gk20a_free_job(c, job);
649clean_up:
650 gk20a_dbg_fn("fail");
651 gk20a_fence_put(pre_fence);
652 gk20a_fence_put(post_fence);
653 if (c->deterministic)
654 nvgpu_rwsem_up_read(&g->deterministic_busy);
655 else if (need_deferred_cleanup)
656 gk20a_idle(g);
657
658 return err;
659}
660