summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/os/linux/channel.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux/channel.c')
-rw-r--r--drivers/gpu/nvgpu/os/linux/channel.c1021
1 files changed, 1021 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
new file mode 100644
index 00000000..7810bc21
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -0,0 +1,1021 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/enabled.h>
18#include <nvgpu/debug.h>
19#include <nvgpu/ltc.h>
20#include <nvgpu/error_notifier.h>
21#include <nvgpu/os_sched.h>
22
23/*
24 * This is required for nvgpu_vm_find_buf() which is used in the tracing
25 * code. Once we can get and access userspace buffers without requiring
26 * direct dma_buf usage this can be removed.
27 */
28#include <nvgpu/linux/vm.h>
29
30#include "gk20a/gk20a.h"
31
32#include "channel.h"
33#include "ioctl_channel.h"
34#include "os_linux.h"
35
36#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
37
38#include <linux/uaccess.h>
39#include <linux/dma-buf.h>
40#include <trace/events/gk20a.h>
41#include <uapi/linux/nvgpu.h>
42
43#include "sync_sema_android.h"
44
45u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
46{
47 u32 flags = 0;
48
49 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
50 flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
51
52 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
53 flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
54
55 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
56 flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
57
58 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
59 flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
60
61 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
62 flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
63
64 if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
65 flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
66
67 return flags;
68}
69
70/*
71 * API to convert error_notifiers in common code and of the form
72 * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
73 * space and of the form NVGPU_CHANNEL_*
74 */
75static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
76{
77 switch (error_notifier) {
78 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
79 return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
80 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
81 return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
82 case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
83 return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
84 case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
85 return NVGPU_CHANNEL_GR_EXCEPTION;
86 case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
87 return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
88 case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
89 return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
90 case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
91 return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
92 case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
93 return NVGPU_CHANNEL_PBDMA_ERROR;
94 case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
95 return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
96 case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
97 return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
98 case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
99 return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
100 }
101
102 pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
103
104 return error_notifier;
105}
106
107/**
108 * nvgpu_set_error_notifier_locked()
109 * Should be called with ch->error_notifier_mutex held
110 *
111 * error should be of the form NVGPU_ERR_NOTIFIER_*
112 */
113void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
114{
115 struct nvgpu_channel_linux *priv = ch->os_priv;
116
117 error = nvgpu_error_notifier_to_channel_notifier(error);
118
119 if (priv->error_notifier.dmabuf) {
120 struct nvgpu_notification *notification =
121 priv->error_notifier.notification;
122 struct timespec time_data;
123 u64 nsec;
124
125 getnstimeofday(&time_data);
126 nsec = ((u64)time_data.tv_sec) * 1000000000u +
127 (u64)time_data.tv_nsec;
128 notification->time_stamp.nanoseconds[0] =
129 (u32)nsec;
130 notification->time_stamp.nanoseconds[1] =
131 (u32)(nsec >> 32);
132 notification->info32 = error;
133 notification->status = 0xffff;
134
135 nvgpu_err(ch->g,
136 "error notifier set to %d for ch %d", error, ch->chid);
137 }
138}
139
140/* error should be of the form NVGPU_ERR_NOTIFIER_* */
141void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
142{
143 struct nvgpu_channel_linux *priv = ch->os_priv;
144
145 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
146 nvgpu_set_error_notifier_locked(ch, error);
147 nvgpu_mutex_release(&priv->error_notifier.mutex);
148}
149
150void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
151{
152 struct nvgpu_channel_linux *priv = ch->os_priv;
153
154 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
155 if (priv->error_notifier.dmabuf) {
156 struct nvgpu_notification *notification =
157 priv->error_notifier.notification;
158
159 /* Don't overwrite error flag if it is already set */
160 if (notification->status != 0xffff)
161 nvgpu_set_error_notifier_locked(ch, error);
162 }
163 nvgpu_mutex_release(&priv->error_notifier.mutex);
164}
165
166/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
167bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
168{
169 struct nvgpu_channel_linux *priv = ch->os_priv;
170 bool notifier_set = false;
171
172 error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
173
174 nvgpu_mutex_acquire(&priv->error_notifier.mutex);
175 if (priv->error_notifier.dmabuf) {
176 struct nvgpu_notification *notification =
177 priv->error_notifier.notification;
178 u32 err = notification->info32;
179
180 if (err == error_notifier)
181 notifier_set = true;
182 }
183 nvgpu_mutex_release(&priv->error_notifier.mutex);
184
185 return notifier_set;
186}
187
188static void gk20a_channel_update_runcb_fn(struct work_struct *work)
189{
190 struct nvgpu_channel_completion_cb *completion_cb =
191 container_of(work, struct nvgpu_channel_completion_cb, work);
192 struct nvgpu_channel_linux *priv =
193 container_of(completion_cb,
194 struct nvgpu_channel_linux, completion_cb);
195 struct channel_gk20a *ch = priv->ch;
196 void (*fn)(struct channel_gk20a *, void *);
197 void *user_data;
198
199 nvgpu_spinlock_acquire(&completion_cb->lock);
200 fn = completion_cb->fn;
201 user_data = completion_cb->user_data;
202 nvgpu_spinlock_release(&completion_cb->lock);
203
204 if (fn)
205 fn(ch, user_data);
206}
207
208static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch)
209{
210 struct nvgpu_channel_linux *priv = ch->os_priv;
211
212 priv->completion_cb.fn = NULL;
213 priv->completion_cb.user_data = NULL;
214 nvgpu_spinlock_init(&priv->completion_cb.lock);
215 INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
216}
217
218static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch)
219{
220 struct nvgpu_channel_linux *priv = ch->os_priv;
221
222 nvgpu_spinlock_acquire(&priv->completion_cb.lock);
223 priv->completion_cb.fn = NULL;
224 priv->completion_cb.user_data = NULL;
225 nvgpu_spinlock_release(&priv->completion_cb.lock);
226 cancel_work_sync(&priv->completion_cb.work);
227}
228
229static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch)
230{
231 struct nvgpu_channel_linux *priv = ch->os_priv;
232
233 if (priv->completion_cb.fn)
234 schedule_work(&priv->completion_cb.work);
235}
236
237static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch)
238{
239 struct nvgpu_channel_linux *priv = ch->os_priv;
240
241 if (priv->completion_cb.fn)
242 cancel_work_sync(&priv->completion_cb.work);
243}
244
245struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
246 void (*update_fn)(struct channel_gk20a *, void *),
247 void *update_fn_data,
248 int runlist_id,
249 bool is_privileged_channel)
250{
251 struct channel_gk20a *ch;
252 struct nvgpu_channel_linux *priv;
253
254 ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel,
255 nvgpu_current_pid(g), nvgpu_current_tid(g));
256
257 if (ch) {
258 priv = ch->os_priv;
259 nvgpu_spinlock_acquire(&priv->completion_cb.lock);
260 priv->completion_cb.fn = update_fn;
261 priv->completion_cb.user_data = update_fn_data;
262 nvgpu_spinlock_release(&priv->completion_cb.lock);
263 }
264
265 return ch;
266}
267
268static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
269{
270}
271
272static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
273{
274 nvgpu_channel_work_completion_clear(ch);
275
276#if defined(CONFIG_GK20A_CYCLE_STATS)
277 gk20a_channel_free_cycle_stats_buffer(ch);
278 gk20a_channel_free_cycle_stats_snapshot(ch);
279#endif
280}
281
282static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
283{
284 struct nvgpu_channel_linux *priv;
285 int err;
286
287 priv = nvgpu_kzalloc(g, sizeof(*priv));
288 if (!priv)
289 return -ENOMEM;
290
291 ch->os_priv = priv;
292 priv->ch = ch;
293
294#ifdef CONFIG_SYNC
295 ch->has_os_fence_framework_support = true;
296#endif
297
298 err = nvgpu_mutex_init(&priv->error_notifier.mutex);
299 if (err) {
300 nvgpu_kfree(g, priv);
301 return err;
302 }
303
304 nvgpu_channel_work_completion_init(ch);
305
306 return 0;
307}
308
309static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
310{
311 struct nvgpu_channel_linux *priv = ch->os_priv;
312
313 nvgpu_mutex_destroy(&priv->error_notifier.mutex);
314 nvgpu_kfree(g, priv);
315
316 ch->os_priv = NULL;
317
318#ifdef CONFIG_SYNC
319 ch->has_os_fence_framework_support = false;
320#endif
321}
322
323static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch,
324 const char *fmt, ...)
325{
326 struct nvgpu_channel_linux *priv = ch->os_priv;
327 struct nvgpu_os_fence_framework *fence_framework;
328 char name[30];
329 va_list args;
330
331 fence_framework = &priv->fence_framework;
332
333 va_start(args, fmt);
334 vsnprintf(name, sizeof(name), fmt, args);
335 va_end(args);
336
337 fence_framework->timeline = gk20a_sync_timeline_create(name);
338
339 if (!fence_framework->timeline)
340 return -EINVAL;
341
342 return 0;
343}
344static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch)
345{
346 struct nvgpu_channel_linux *priv = ch->os_priv;
347 struct nvgpu_os_fence_framework *fence_framework;
348
349 fence_framework = &priv->fence_framework;
350
351 gk20a_sync_timeline_signal(fence_framework->timeline);
352}
353
354static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch)
355{
356 struct nvgpu_channel_linux *priv = ch->os_priv;
357 struct nvgpu_os_fence_framework *fence_framework;
358
359 fence_framework = &priv->fence_framework;
360
361 gk20a_sync_timeline_destroy(fence_framework->timeline);
362 fence_framework->timeline = NULL;
363}
364
365static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch)
366{
367 struct nvgpu_channel_linux *priv = ch->os_priv;
368 struct nvgpu_os_fence_framework *fence_framework;
369
370 fence_framework = &priv->fence_framework;
371
372 return (fence_framework->timeline != NULL);
373}
374
375int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
376{
377 struct gk20a *g = &l->g;
378 struct fifo_gk20a *f = &g->fifo;
379 int chid;
380 int err;
381
382 for (chid = 0; chid < (int)f->num_channels; chid++) {
383 struct channel_gk20a *ch = &f->channel[chid];
384
385 err = nvgpu_channel_alloc_linux(g, ch);
386 if (err)
387 goto err_clean;
388 }
389
390 g->os_channel.open = nvgpu_channel_open_linux;
391 g->os_channel.close = nvgpu_channel_close_linux;
392 g->os_channel.work_completion_signal =
393 nvgpu_channel_work_completion_signal;
394 g->os_channel.work_completion_cancel_sync =
395 nvgpu_channel_work_completion_cancel_sync;
396
397 g->os_channel.os_fence_framework_inst_exists =
398 nvgpu_channel_fence_framework_exists;
399 g->os_channel.init_os_fence_framework =
400 nvgpu_channel_init_os_fence_framework;
401 g->os_channel.signal_os_fence_framework =
402 nvgpu_channel_signal_os_fence_framework;
403 g->os_channel.destroy_os_fence_framework =
404 nvgpu_channel_destroy_os_fence_framework;
405
406 return 0;
407
408err_clean:
409 for (; chid >= 0; chid--) {
410 struct channel_gk20a *ch = &f->channel[chid];
411
412 nvgpu_channel_free_linux(g, ch);
413 }
414 return err;
415}
416
417void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
418{
419 struct gk20a *g = &l->g;
420 struct fifo_gk20a *f = &g->fifo;
421 unsigned int chid;
422
423 for (chid = 0; chid < f->num_channels; chid++) {
424 struct channel_gk20a *ch = &f->channel[chid];
425
426 nvgpu_channel_free_linux(g, ch);
427 }
428
429 g->os_channel.os_fence_framework_inst_exists = NULL;
430 g->os_channel.init_os_fence_framework = NULL;
431 g->os_channel.signal_os_fence_framework = NULL;
432 g->os_channel.destroy_os_fence_framework = NULL;
433}
434
435u32 nvgpu_get_gpfifo_entry_size(void)
436{
437 return sizeof(struct nvgpu_gpfifo_entry);
438}
439
440#ifdef CONFIG_DEBUG_FS
441static void trace_write_pushbuffer(struct channel_gk20a *c,
442 struct nvgpu_gpfifo_entry *g)
443{
444 void *mem = NULL;
445 unsigned int words;
446 u64 offset;
447 struct dma_buf *dmabuf = NULL;
448
449 if (gk20a_debug_trace_cmdbuf) {
450 u64 gpu_va = (u64)g->entry0 |
451 (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
452 int err;
453
454 words = pbdma_gp_entry1_length_v(g->entry1);
455 err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
456 if (!err)
457 mem = dma_buf_vmap(dmabuf);
458 }
459
460 if (mem) {
461 u32 i;
462 /*
463 * Write in batches of 128 as there seems to be a limit
464 * of how much you can output to ftrace at once.
465 */
466 for (i = 0; i < words; i += 128U) {
467 trace_gk20a_push_cmdbuf(
468 c->g->name,
469 0,
470 min(words - i, 128U),
471 offset + i * sizeof(u32),
472 mem);
473 }
474 dma_buf_vunmap(dmabuf, mem);
475 }
476}
477#endif
478
479static void trace_write_pushbuffer_range(struct channel_gk20a *c,
480 struct nvgpu_gpfifo_entry *g,
481 struct nvgpu_gpfifo_entry __user *user_gpfifo,
482 int offset,
483 int count)
484{
485#ifdef CONFIG_DEBUG_FS
486 u32 size;
487 int i;
488 struct nvgpu_gpfifo_entry *gp;
489 bool gpfifo_allocated = false;
490
491 if (!gk20a_debug_trace_cmdbuf)
492 return;
493
494 if (!g && !user_gpfifo)
495 return;
496
497 if (!g) {
498 size = count * sizeof(struct nvgpu_gpfifo_entry);
499 if (size) {
500 g = nvgpu_big_malloc(c->g, size);
501 if (!g)
502 return;
503
504 if (copy_from_user(g, user_gpfifo, size)) {
505 nvgpu_big_free(c->g, g);
506 return;
507 }
508 }
509 gpfifo_allocated = true;
510 }
511
512 gp = g + offset;
513 for (i = 0; i < count; i++, gp++)
514 trace_write_pushbuffer(c, gp);
515
516 if (gpfifo_allocated)
517 nvgpu_big_free(c->g, g);
518#endif
519}
520
521/*
522 * Handle the submit synchronization - pre-fences and post-fences.
523 */
524static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
525 struct nvgpu_channel_fence *fence,
526 struct channel_gk20a_job *job,
527 struct priv_cmd_entry **wait_cmd,
528 struct priv_cmd_entry **incr_cmd,
529 struct gk20a_fence **post_fence,
530 bool register_irq,
531 u32 flags)
532{
533 struct gk20a *g = c->g;
534 bool need_sync_fence = false;
535 bool new_sync_created = false;
536 int wait_fence_fd = -1;
537 int err = 0;
538 bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
539 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
540
541 if (g->aggressive_sync_destroy_thresh) {
542 nvgpu_mutex_acquire(&c->sync_lock);
543 if (!c->sync) {
544 c->sync = gk20a_channel_sync_create(c, false);
545 if (!c->sync) {
546 err = -ENOMEM;
547 nvgpu_mutex_release(&c->sync_lock);
548 goto fail;
549 }
550 new_sync_created = true;
551 }
552 nvgpu_atomic_inc(&c->sync->refcount);
553 nvgpu_mutex_release(&c->sync_lock);
554 }
555
556 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
557 err = g->ops.fifo.resetup_ramfc(c);
558 if (err)
559 goto fail;
560 }
561
562 /*
563 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
564 * submission when user requested and the wait hasn't expired.
565 */
566 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
567 int max_wait_cmds = c->deterministic ? 1 : 0;
568
569 if (!pre_alloc_enabled)
570 job->wait_cmd = nvgpu_kzalloc(g,
571 sizeof(struct priv_cmd_entry));
572
573 if (!job->wait_cmd) {
574 err = -ENOMEM;
575 goto fail;
576 }
577
578 if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
579 wait_fence_fd = fence->id;
580 err = c->sync->wait_fd(c->sync, wait_fence_fd,
581 job->wait_cmd, max_wait_cmds);
582 } else {
583 err = c->sync->wait_syncpt(c->sync, fence->id,
584 fence->value,
585 job->wait_cmd);
586 }
587
588 if (err)
589 goto clean_up_wait_cmd;
590
591 if (job->wait_cmd->valid)
592 *wait_cmd = job->wait_cmd;
593 }
594
595 if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
596 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
597 need_sync_fence = true;
598
599 /*
600 * Always generate an increment at the end of a GPFIFO submission. This
601 * is used to keep track of method completion for idle railgating. The
602 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
603 */
604 job->post_fence = gk20a_alloc_fence(c);
605 if (!job->post_fence) {
606 err = -ENOMEM;
607 goto clean_up_wait_cmd;
608 }
609 if (!pre_alloc_enabled)
610 job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
611
612 if (!job->incr_cmd) {
613 err = -ENOMEM;
614 goto clean_up_post_fence;
615 }
616
617 if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
618 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
619 job->post_fence, need_wfi, need_sync_fence,
620 register_irq);
621 else
622 err = c->sync->incr(c->sync, job->incr_cmd,
623 job->post_fence, need_sync_fence,
624 register_irq);
625 if (!err) {
626 *incr_cmd = job->incr_cmd;
627 *post_fence = job->post_fence;
628 } else
629 goto clean_up_incr_cmd;
630
631 return 0;
632
633clean_up_incr_cmd:
634 free_priv_cmdbuf(c, job->incr_cmd);
635 if (!pre_alloc_enabled)
636 job->incr_cmd = NULL;
637clean_up_post_fence:
638 gk20a_fence_put(job->post_fence);
639 job->post_fence = NULL;
640clean_up_wait_cmd:
641 free_priv_cmdbuf(c, job->wait_cmd);
642 if (!pre_alloc_enabled)
643 job->wait_cmd = NULL;
644fail:
645 *wait_cmd = NULL;
646 return err;
647}
648
649static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
650 struct priv_cmd_entry *cmd)
651{
652 struct gk20a *g = c->g;
653 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
654 struct nvgpu_gpfifo_entry x = {
655 .entry0 = u64_lo32(cmd->gva),
656 .entry1 = u64_hi32(cmd->gva) |
657 pbdma_gp_entry1_length_f(cmd->size)
658 };
659
660 nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
661 &x, sizeof(x));
662
663 if (cmd->mem->aperture == APERTURE_SYSMEM)
664 trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
665 cmd->mem->cpu_va + cmd->off * sizeof(u32));
666
667 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
668}
669
670/*
671 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
672 * splitting into two memcpys to handle wrap-around.
673 */
674static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
675 struct nvgpu_gpfifo_entry *kern_gpfifo,
676 struct nvgpu_gpfifo_entry __user *user_gpfifo,
677 u32 num_entries)
678{
679 /* byte offsets */
680 u32 gpfifo_size =
681 c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
682 u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
683 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
684 u32 end = start + len; /* exclusive */
685 struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
686 struct nvgpu_gpfifo_entry *cpu_src;
687 int err;
688
689 if (user_gpfifo && !c->gpfifo.pipe) {
690 /*
691 * This path (from userspace to sysmem) is special in order to
692 * avoid two copies unnecessarily (from user to pipe, then from
693 * pipe to gpu sysmem buffer).
694 */
695 if (end > gpfifo_size) {
696 /* wrap-around */
697 int length0 = gpfifo_size - start;
698 int length1 = len - length0;
699 void __user *user2 = (u8 __user *)user_gpfifo + length0;
700
701 err = copy_from_user(gpfifo_mem->cpu_va + start,
702 user_gpfifo, length0);
703 if (err)
704 return err;
705
706 err = copy_from_user(gpfifo_mem->cpu_va,
707 user2, length1);
708 if (err)
709 return err;
710 } else {
711 err = copy_from_user(gpfifo_mem->cpu_va + start,
712 user_gpfifo, len);
713 if (err)
714 return err;
715 }
716
717 trace_write_pushbuffer_range(c, NULL, user_gpfifo,
718 0, num_entries);
719 goto out;
720 } else if (user_gpfifo) {
721 /* from userspace to vidmem, use the common copy path below */
722 err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
723 if (err)
724 return err;
725
726 cpu_src = c->gpfifo.pipe;
727 } else {
728 /* from kernel to either sysmem or vidmem, don't need
729 * copy_from_user so use the common path below */
730 cpu_src = kern_gpfifo;
731 }
732
733 if (end > gpfifo_size) {
734 /* wrap-around */
735 int length0 = gpfifo_size - start;
736 int length1 = len - length0;
737 void *src2 = (u8 *)cpu_src + length0;
738
739 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
740 nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
741 } else {
742 nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
743
744 }
745
746 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
747
748out:
749 c->gpfifo.put = (c->gpfifo.put + num_entries) &
750 (c->gpfifo.entry_num - 1);
751
752 return 0;
753}
754
755int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
756 struct nvgpu_gpfifo_entry *gpfifo,
757 struct nvgpu_submit_gpfifo_args *args,
758 u32 num_entries,
759 u32 flags,
760 struct nvgpu_channel_fence *fence,
761 struct gk20a_fence **fence_out,
762 struct fifo_profile_gk20a *profile)
763{
764 struct gk20a *g = c->g;
765 struct priv_cmd_entry *wait_cmd = NULL;
766 struct priv_cmd_entry *incr_cmd = NULL;
767 struct gk20a_fence *post_fence = NULL;
768 struct channel_gk20a_job *job = NULL;
769 /* we might need two extra gpfifo entries - one for pre fence
770 * and one for post fence. */
771 const int extra_entries = 2;
772 bool skip_buffer_refcounting = (flags &
773 NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
774 int err = 0;
775 bool need_job_tracking;
776 bool need_deferred_cleanup = false;
777 struct nvgpu_gpfifo_entry __user *user_gpfifo = args ?
778 (struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL;
779
780 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
781 return -ENODEV;
782
783 if (c->has_timedout)
784 return -ETIMEDOUT;
785
786 if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
787 return -ENOMEM;
788
789 /* fifo not large enough for request. Return error immediately.
790 * Kernel can insert gpfifo entries before and after user gpfifos.
791 * So, add extra_entries in user request. Also, HW with fifo size N
792 * can accept only N-1 entreis and so the below condition */
793 if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
794 nvgpu_err(g, "not enough gpfifo space allocated");
795 return -ENOMEM;
796 }
797
798 if (!gpfifo && !args)
799 return -EINVAL;
800
801 if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
802 NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
803 !fence)
804 return -EINVAL;
805
806 /* an address space needs to have been bound at this point. */
807 if (!gk20a_channel_as_bound(c)) {
808 nvgpu_err(g,
809 "not bound to an address space at time of gpfifo"
810 " submission.");
811 return -EINVAL;
812 }
813
814 gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
815
816 /* update debug settings */
817 nvgpu_ltc_sync_enabled(g);
818
819 nvgpu_log_info(g, "channel %d", c->chid);
820
821 /*
822 * Job tracking is necessary for any of the following conditions:
823 * - pre- or post-fence functionality
824 * - channel wdt
825 * - GPU rail-gating with non-deterministic channels
826 * - buffer refcounting
827 *
828 * If none of the conditions are met, then job tracking is not
829 * required and a fast submit can be done (ie. only need to write
830 * out userspace GPFIFO entries and update GP_PUT).
831 */
832 need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
833 (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
834 c->timeout.enabled ||
835 (g->can_railgate && !c->deterministic) ||
836 !skip_buffer_refcounting;
837
838 if (need_job_tracking) {
839 bool need_sync_framework = false;
840
841 /*
842 * If the channel is to have deterministic latency and
843 * job tracking is required, the channel must have
844 * pre-allocated resources. Otherwise, we fail the submit here
845 */
846 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
847 return -EINVAL;
848
849 need_sync_framework =
850 gk20a_channel_sync_needs_sync_framework(g) ||
851 (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
852 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
853
854 /*
855 * Deferred clean-up is necessary for any of the following
856 * conditions:
857 * - channel's deterministic flag is not set
858 * - dependency on sync framework, which could make the
859 * behavior of the clean-up operation non-deterministic
860 * (should not be performed in the submit path)
861 * - channel wdt
862 * - GPU rail-gating with non-deterministic channels
863 * - buffer refcounting
864 *
865 * If none of the conditions are met, then deferred clean-up
866 * is not required, and we clean-up one job-tracking
867 * resource in the submit path.
868 */
869 need_deferred_cleanup = !c->deterministic ||
870 need_sync_framework ||
871 c->timeout.enabled ||
872 (g->can_railgate &&
873 !c->deterministic) ||
874 !skip_buffer_refcounting;
875
876 /*
877 * For deterministic channels, we don't allow deferred clean_up
878 * processing to occur. In cases we hit this, we fail the submit
879 */
880 if (c->deterministic && need_deferred_cleanup)
881 return -EINVAL;
882
883 if (!c->deterministic) {
884 /*
885 * Get a power ref unless this is a deterministic
886 * channel that holds them during the channel lifetime.
887 * This one is released by gk20a_channel_clean_up_jobs,
888 * via syncpt or sema interrupt, whichever is used.
889 */
890 err = gk20a_busy(g);
891 if (err) {
892 nvgpu_err(g,
893 "failed to host gk20a to submit gpfifo, process %s",
894 current->comm);
895 return err;
896 }
897 }
898
899 if (!need_deferred_cleanup) {
900 /* clean up a single job */
901 gk20a_channel_clean_up_jobs(c, false);
902 }
903 }
904
905
906 /* Grab access to HW to deal with do_idle */
907 if (c->deterministic)
908 nvgpu_rwsem_down_read(&g->deterministic_busy);
909
910 if (c->deterministic && c->deterministic_railgate_allowed) {
911 /*
912 * Nope - this channel has dropped its own power ref. As
913 * deterministic submits don't hold power on per each submitted
914 * job like normal ones do, the GPU might railgate any time now
915 * and thus submit is disallowed.
916 */
917 err = -EINVAL;
918 goto clean_up;
919 }
920
921 trace_gk20a_channel_submit_gpfifo(g->name,
922 c->chid,
923 num_entries,
924 flags,
925 fence ? fence->id : 0,
926 fence ? fence->value : 0);
927
928 nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
929 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
930
931 /*
932 * Make sure we have enough space for gpfifo entries. Check cached
933 * values first and then read from HW. If no space, return EAGAIN
934 * and let userpace decide to re-try request or not.
935 */
936 if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
937 if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
938 err = -EAGAIN;
939 goto clean_up;
940 }
941 }
942
943 if (c->has_timedout) {
944 err = -ETIMEDOUT;
945 goto clean_up;
946 }
947
948 if (need_job_tracking) {
949 err = channel_gk20a_alloc_job(c, &job);
950 if (err)
951 goto clean_up;
952
953 err = gk20a_submit_prepare_syncs(c, fence, job,
954 &wait_cmd, &incr_cmd,
955 &post_fence,
956 need_deferred_cleanup,
957 flags);
958 if (err)
959 goto clean_up_job;
960 }
961
962 gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
963
964 if (wait_cmd)
965 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
966
967 if (gpfifo || user_gpfifo)
968 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
969 num_entries);
970 if (err)
971 goto clean_up_job;
972
973 /*
974 * And here's where we add the incr_cmd we generated earlier. It should
975 * always run!
976 */
977 if (incr_cmd)
978 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
979
980 if (fence_out)
981 *fence_out = gk20a_fence_get(post_fence);
982
983 if (need_job_tracking)
984 /* TODO! Check for errors... */
985 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
986 gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
987
988 g->ops.fifo.userd_gp_put(g, c);
989
990 /* No hw access beyond this point */
991 if (c->deterministic)
992 nvgpu_rwsem_up_read(&g->deterministic_busy);
993
994 trace_gk20a_channel_submitted_gpfifo(g->name,
995 c->chid,
996 num_entries,
997 flags,
998 post_fence ? post_fence->syncpt_id : 0,
999 post_fence ? post_fence->syncpt_value : 0);
1000
1001 nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
1002 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1003
1004 gk20a_fifo_profile_snapshot(profile, PROFILE_END);
1005
1006 nvgpu_log_fn(g, "done");
1007 return err;
1008
1009clean_up_job:
1010 channel_gk20a_free_job(c, job);
1011clean_up:
1012 nvgpu_log_fn(g, "fail");
1013 gk20a_fence_put(post_fence);
1014 if (c->deterministic)
1015 nvgpu_rwsem_up_read(&g->deterministic_busy);
1016 else if (need_deferred_cleanup)
1017 gk20a_idle(g);
1018
1019 return err;
1020}
1021