summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c942
1 files changed, 942 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
new file mode 100644
index 00000000..d83684e4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -0,0 +1,942 @@
1/*
2 * GK20A Channel Synchronization Abstraction
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/semaphore.h>
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28#include <nvgpu/atomic.h>
29#include <nvgpu/bug.h>
30#include <nvgpu/list.h>
31#include <nvgpu/nvhost.h>
32
33#include "channel_sync_gk20a.h"
34#include "gk20a.h"
35#include "fence_gk20a.h"
36#include "sync_gk20a.h"
37#include "mm_gk20a.h"
38
39#ifdef CONFIG_SYNC
40#include "../drivers/staging/android/sync.h"
41#endif
42
43#ifdef CONFIG_TEGRA_GK20A_NVHOST
44
45struct gk20a_channel_syncpt {
46 struct gk20a_channel_sync ops;
47 struct channel_gk20a *c;
48 struct nvgpu_nvhost_dev *nvhost_dev;
49 u32 id;
50 struct nvgpu_mem syncpt_buf;
51};
52
53static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
54 u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
55 struct gk20a_fence *fence)
56{
57 struct gk20a_channel_syncpt *sp =
58 container_of(s, struct gk20a_channel_syncpt, ops);
59 struct channel_gk20a *c = sp->c;
60 int err = 0;
61
62 if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id)) {
63 nvgpu_warn(c->g, "invalid wait id in gpfifo submit, elided");
64 return 0;
65 }
66
67 if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh))
68 return 0;
69
70 err = gk20a_channel_alloc_priv_cmdbuf(c,
71 c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
72 if (err) {
73 nvgpu_err(c->g,
74 "not enough priv cmd buffer space");
75 return err;
76 }
77
78 nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
79 id, sp->c->vm->syncpt_ro_map_gpu_va);
80 c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id,
81 thresh, c->vm->syncpt_ro_map_gpu_va);
82
83 return 0;
84}
85
86static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
87 struct priv_cmd_entry *wait_cmd,
88 struct gk20a_fence *fence)
89{
90#ifdef CONFIG_SYNC
91 int i;
92 int num_wait_cmds;
93 struct sync_fence *sync_fence;
94 struct sync_pt *pt;
95 struct gk20a_channel_syncpt *sp =
96 container_of(s, struct gk20a_channel_syncpt, ops);
97 struct channel_gk20a *c = sp->c;
98 u32 wait_id;
99 int err = 0;
100 u32 wait_cmd_size = 0;
101
102 sync_fence = nvgpu_nvhost_sync_fdget(fd);
103 if (!sync_fence)
104 return -EINVAL;
105
106 /* validate syncpt ids */
107 for (i = 0; i < sync_fence->num_fences; i++) {
108 pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
109 wait_id = nvgpu_nvhost_sync_pt_id(pt);
110 if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
111 sp->nvhost_dev, wait_id)) {
112 sync_fence_put(sync_fence);
113 return -EINVAL;
114 }
115 }
116
117 num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
118 if (num_wait_cmds == 0) {
119 sync_fence_put(sync_fence);
120 return 0;
121 }
122 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
123 err = gk20a_channel_alloc_priv_cmdbuf(c,
124 wait_cmd_size * num_wait_cmds,
125 wait_cmd);
126 if (err) {
127 nvgpu_err(c->g,
128 "not enough priv cmd buffer space");
129 sync_fence_put(sync_fence);
130 return err;
131 }
132
133 i = 0;
134 for (i = 0; i < sync_fence->num_fences; i++) {
135 struct fence *f = sync_fence->cbs[i].sync_pt;
136 struct sync_pt *pt = sync_pt_from_fence(f);
137 u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
138 u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
139
140 if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev,
141 wait_id, wait_value)) {
142 nvgpu_memset(c->g, wait_cmd->mem,
143 (wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
144 0, wait_cmd_size * sizeof(u32));
145 } else {
146 nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
147 wait_id, sp->syncpt_buf.gpu_va);
148 c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
149 i * wait_cmd_size, wait_id, wait_value,
150 c->vm->syncpt_ro_map_gpu_va);
151 }
152 }
153
154 WARN_ON(i != num_wait_cmds);
155 sync_fence_put(sync_fence);
156
157 return 0;
158#else
159 return -ENODEV;
160#endif
161}
162
163static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
164{
165 struct channel_gk20a *ch = priv;
166
167 gk20a_channel_update(ch);
168
169 /* note: channel_get() is in __gk20a_channel_syncpt_incr() */
170 gk20a_channel_put(ch);
171}
172
173static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
174 bool wfi_cmd,
175 bool register_irq,
176 struct priv_cmd_entry *incr_cmd,
177 struct gk20a_fence *fence,
178 bool need_sync_fence)
179{
180 u32 thresh;
181 int err;
182 struct gk20a_channel_syncpt *sp =
183 container_of(s, struct gk20a_channel_syncpt, ops);
184 struct channel_gk20a *c = sp->c;
185
186 err = gk20a_channel_alloc_priv_cmdbuf(c,
187 c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd),
188 incr_cmd);
189 if (err)
190 return err;
191
192 nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
193 sp->id, sp->syncpt_buf.gpu_va);
194 c->g->ops.fifo.add_syncpt_incr_cmd(c->g, wfi_cmd,
195 incr_cmd, sp->id, sp->syncpt_buf.gpu_va);
196
197 thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id, 2);
198
199 if (register_irq) {
200 struct channel_gk20a *referenced = gk20a_channel_get(c);
201
202 WARN_ON(!referenced);
203
204 if (referenced) {
205 /* note: channel_put() is in
206 * gk20a_channel_syncpt_update() */
207
208 err = nvgpu_nvhost_intr_register_notifier(
209 sp->nvhost_dev,
210 sp->id, thresh,
211 gk20a_channel_syncpt_update, c);
212 if (err)
213 gk20a_channel_put(referenced);
214
215 /* Adding interrupt action should
216 * never fail. A proper error handling
217 * here would require us to decrement
218 * the syncpt max back to its original
219 * value. */
220 WARN(err,
221 "failed to set submit complete interrupt");
222 }
223 }
224
225 err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, sp->id, thresh,
226 wfi_cmd, need_sync_fence);
227 if (err)
228 goto clean_up_priv_cmd;
229
230 return 0;
231
232clean_up_priv_cmd:
233 gk20a_free_priv_cmdbuf(c, incr_cmd);
234 return err;
235}
236
237static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
238 struct priv_cmd_entry *entry,
239 struct gk20a_fence *fence)
240{
241 return __gk20a_channel_syncpt_incr(s,
242 true /* wfi */,
243 false /* no irq handler */,
244 entry, fence, true);
245}
246
247static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
248 struct priv_cmd_entry *entry,
249 struct gk20a_fence *fence,
250 bool need_sync_fence,
251 bool register_irq)
252{
253 /* Don't put wfi cmd to this one since we're not returning
254 * a fence to user space. */
255 return __gk20a_channel_syncpt_incr(s,
256 false /* no wfi */,
257 register_irq /* register irq */,
258 entry, fence, need_sync_fence);
259}
260
261static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
262 int wait_fence_fd,
263 struct priv_cmd_entry *entry,
264 struct gk20a_fence *fence,
265 bool wfi,
266 bool need_sync_fence,
267 bool register_irq)
268{
269 /* Need to do 'wfi + host incr' since we return the fence
270 * to user space. */
271 return __gk20a_channel_syncpt_incr(s,
272 wfi,
273 register_irq /* register irq */,
274 entry, fence, need_sync_fence);
275}
276
277static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
278{
279 struct gk20a_channel_syncpt *sp =
280 container_of(s, struct gk20a_channel_syncpt, ops);
281 nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
282}
283
284static void gk20a_channel_syncpt_signal_timeline(
285 struct gk20a_channel_sync *s)
286{
287 /* Nothing to do. */
288}
289
290static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s)
291{
292 struct gk20a_channel_syncpt *sp =
293 container_of(s, struct gk20a_channel_syncpt, ops);
294 return sp->id;
295}
296
297static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
298{
299 struct gk20a_channel_syncpt *sp =
300 container_of(s, struct gk20a_channel_syncpt, ops);
301
302
303 sp->c->g->ops.fifo.free_syncpt_buf(sp->c, &sp->syncpt_buf);
304
305 nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
306 nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost_dev, sp->id);
307 nvgpu_kfree(sp->c->g, sp);
308}
309
310static struct gk20a_channel_sync *
311gk20a_channel_syncpt_create(struct channel_gk20a *c)
312{
313 struct gk20a_channel_syncpt *sp;
314 char syncpt_name[32];
315
316 sp = nvgpu_kzalloc(c->g, sizeof(*sp));
317 if (!sp)
318 return NULL;
319
320 sp->c = c;
321 sp->nvhost_dev = c->g->nvhost_dev;
322
323 snprintf(syncpt_name, sizeof(syncpt_name),
324 "%s_%d", c->g->name, c->chid);
325
326 sp->id = nvgpu_nvhost_get_syncpt_host_managed(sp->nvhost_dev,
327 c->chid, syncpt_name);
328 if (!sp->id) {
329 nvgpu_kfree(c->g, sp);
330 nvgpu_err(c->g, "failed to get free syncpt");
331 return NULL;
332 }
333
334 sp->c->g->ops.fifo.alloc_syncpt_buf(sp->c, sp->id,
335 &sp->syncpt_buf);
336
337 nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
338
339 nvgpu_atomic_set(&sp->ops.refcount, 0);
340 sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
341 sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
342 sp->ops.incr = gk20a_channel_syncpt_incr;
343 sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
344 sp->ops.incr_user = gk20a_channel_syncpt_incr_user;
345 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
346 sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
347 sp->ops.syncpt_id = gk20a_channel_syncpt_id;
348 sp->ops.destroy = gk20a_channel_syncpt_destroy;
349
350 return &sp->ops;
351}
352#endif /* CONFIG_TEGRA_GK20A_NVHOST */
353
354struct gk20a_channel_semaphore {
355 struct gk20a_channel_sync ops;
356 struct channel_gk20a *c;
357
358 /* A semaphore pool owned by this channel. */
359 struct nvgpu_semaphore_pool *pool;
360
361 /* A sync timeline that advances when gpu completes work. */
362 struct sync_timeline *timeline;
363};
364
365#ifdef CONFIG_SYNC
366struct wait_fence_work {
367 struct sync_fence_waiter waiter;
368 struct sync_fence *fence;
369 struct channel_gk20a *ch;
370 struct nvgpu_semaphore *sema;
371 struct gk20a *g;
372 struct nvgpu_list_node entry;
373};
374
375static inline struct wait_fence_work *
376wait_fence_work_from_entry(struct nvgpu_list_node *node)
377{
378 return (struct wait_fence_work *)
379 ((uintptr_t)node - offsetof(struct wait_fence_work, entry));
380};
381
382/*
383 * Keep track of all the pending waits on semaphores that exist for a GPU. This
384 * has to be done because the waits on fences backed by semaphores are
385 * asynchronous so it's impossible to otherwise know when they will fire. During
386 * driver cleanup this list can be checked and all existing waits can be
387 * canceled.
388 */
389static void gk20a_add_pending_sema_wait(struct gk20a *g,
390 struct wait_fence_work *work)
391{
392 nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
393 nvgpu_list_add(&work->entry, &g->pending_sema_waits);
394 nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
395}
396
397/*
398 * Copy the list head from the pending wait list to the passed list and
399 * then delete the entire pending list.
400 */
401static void gk20a_start_sema_wait_cancel(struct gk20a *g,
402 struct nvgpu_list_node *list)
403{
404 nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
405 nvgpu_list_replace_init(&g->pending_sema_waits, list);
406 nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
407}
408
409/*
410 * During shutdown this should be called to make sure that any pending sema
411 * waits are canceled. This is a fairly delicate and tricky bit of code. Here's
412 * how it works.
413 *
414 * Every time a semaphore wait is initiated in SW the wait_fence_work struct is
415 * added to the pending_sema_waits list. When the semaphore launcher code runs
416 * it checks the pending_sema_waits list. If this list is non-empty that means
417 * that the wait_fence_work struct must be present and can be removed.
418 *
419 * When the driver shuts down one of the steps is to cancel pending sema waits.
420 * To do this the entire list of pending sema waits is removed (and stored in a
421 * separate local list). So now, if the semaphore launcher code runs it will see
422 * that the pending_sema_waits list is empty and knows that it no longer owns
423 * the wait_fence_work struct.
424 */
425void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
426{
427 struct wait_fence_work *work;
428 struct nvgpu_list_node local_pending_sema_waits;
429
430 gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits);
431
432 while (!nvgpu_list_empty(&local_pending_sema_waits)) {
433 int ret;
434
435 work = nvgpu_list_first_entry(&local_pending_sema_waits,
436 wait_fence_work,
437 entry);
438
439 nvgpu_list_del(&work->entry);
440
441 /*
442 * Only nvgpu_kfree() work if the cancel is successful.
443 * Otherwise it's in use by the
444 * gk20a_channel_semaphore_launcher() code.
445 */
446 ret = sync_fence_cancel_async(work->fence, &work->waiter);
447 if (ret == 0)
448 nvgpu_kfree(g, work);
449 }
450}
451
452static void gk20a_channel_semaphore_launcher(
453 struct sync_fence *fence,
454 struct sync_fence_waiter *waiter)
455{
456 int err;
457 struct wait_fence_work *w =
458 container_of(waiter, struct wait_fence_work, waiter);
459 struct gk20a *g = w->g;
460
461 /*
462 * This spinlock must protect a _very_ small critical section -
463 * otherwise it's possible that the deterministic submit path suffers.
464 */
465 nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
466 if (!nvgpu_list_empty(&g->pending_sema_waits))
467 nvgpu_list_del(&w->entry);
468 nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
469
470 gk20a_dbg_info("waiting for pre fence %p '%s'",
471 fence, fence->name);
472 err = sync_fence_wait(fence, -1);
473 if (err < 0)
474 nvgpu_err(g, "error waiting pre-fence: %d", err);
475
476 gk20a_dbg_info(
477 "wait completed (%d) for fence %p '%s', triggering gpu work",
478 err, fence, fence->name);
479 sync_fence_put(fence);
480 nvgpu_semaphore_release(w->sema);
481 nvgpu_semaphore_put(w->sema);
482 nvgpu_kfree(g, w);
483}
484#endif
485
486static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
487 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
488 int cmd_size, bool acquire, bool wfi)
489{
490 int ch = c->chid;
491 u32 ob, off = cmd->off;
492 u64 va;
493
494 ob = off;
495
496 /*
497 * RO for acquire (since we just need to read the mem) and RW for
498 * release since we will need to write back to the semaphore memory.
499 */
500 va = acquire ? nvgpu_semaphore_gpu_ro_va(s) :
501 nvgpu_semaphore_gpu_rw_va(s);
502
503 /*
504 * If the op is not an acquire (so therefor a release) we should
505 * incr the underlying sema next_value.
506 */
507 if (!acquire)
508 nvgpu_semaphore_incr(s);
509
510 /* semaphore_a */
511 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
512 /* offset_upper */
513 nvgpu_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff);
514 /* semaphore_b */
515 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
516 /* offset */
517 nvgpu_mem_wr32(g, cmd->mem, off++, va & 0xffffffff);
518
519 if (acquire) {
520 /* semaphore_c */
521 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
522 /* payload */
523 nvgpu_mem_wr32(g, cmd->mem, off++,
524 nvgpu_semaphore_get_value(s));
525 /* semaphore_d */
526 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
527 /* operation: acq_geq, switch_en */
528 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
529 } else {
530 /* semaphore_c */
531 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
532 /* payload */
533 nvgpu_mem_wr32(g, cmd->mem, off++,
534 nvgpu_semaphore_get_value(s));
535 /* semaphore_d */
536 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
537 /* operation: release, wfi */
538 nvgpu_mem_wr32(g, cmd->mem, off++,
539 0x2 | ((wfi ? 0x0 : 0x1) << 20));
540 /* non_stall_int */
541 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
542 /* ignored */
543 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
544 }
545
546 if (acquire)
547 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d"
548 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
549 ch, nvgpu_semaphore_get_value(s),
550 s->hw_sema->ch->chid, va, cmd->gva,
551 cmd->mem->gpu_va, ob);
552 else
553 gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx "
554 "cmd_mem=0x%llx b=0x%llx off=%u",
555 ch, nvgpu_semaphore_get_value(s),
556 nvgpu_semaphore_read(s), va, cmd->gva,
557 cmd->mem->gpu_va, ob);
558}
559
560static int gk20a_channel_semaphore_wait_syncpt(
561 struct gk20a_channel_sync *s, u32 id,
562 u32 thresh, struct priv_cmd_entry *entry,
563 struct gk20a_fence *fence)
564{
565 struct gk20a_channel_semaphore *sema =
566 container_of(s, struct gk20a_channel_semaphore, ops);
567 struct gk20a *g = sema->c->g;
568 nvgpu_err(g, "trying to use syncpoint synchronization");
569 return -ENODEV;
570}
571
572#ifdef CONFIG_SYNC
573/*
574 * Attempt a fast path for waiting on a sync_fence. Basically if the passed
575 * sync_fence is backed by a nvgpu_semaphore then there's no reason to go
576 * through the rigmarole of setting up a separate semaphore which waits on an
577 * interrupt from the GPU and then triggers a worker thread to execute a SW
578 * based semaphore release. Instead just have the GPU wait on the same semaphore
579 * that is going to be incremented by the GPU.
580 *
581 * This function returns 2 possible values: -ENODEV or 0 on success. In the case
582 * of -ENODEV the fastpath cannot be taken due to the fence not being backed by
583 * a GPU semaphore.
584 */
585static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
586 struct sync_fence *fence,
587 struct priv_cmd_entry *wait_cmd,
588 struct nvgpu_semaphore **fp_sema)
589{
590 struct nvgpu_semaphore *sema;
591 int err;
592
593 if (!gk20a_is_sema_backed_sync_fence(fence))
594 return -ENODEV;
595
596 sema = gk20a_sync_fence_get_sema(fence);
597
598 /*
599 * If there's no underlying sema then that means the underlying sema has
600 * already signaled.
601 */
602 if (!sema) {
603 *fp_sema = NULL;
604 return 0;
605 }
606
607 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
608 if (err)
609 return err;
610
611 nvgpu_semaphore_get(sema);
612 BUG_ON(!nvgpu_atomic_read(&sema->value));
613 add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
614
615 /*
616 * Make sure that gk20a_channel_semaphore_wait_fd() can create another
617 * fence with the underlying semaphore.
618 */
619 *fp_sema = sema;
620
621 return 0;
622}
623#endif
624
625static int gk20a_channel_semaphore_wait_fd(
626 struct gk20a_channel_sync *s, int fd,
627 struct priv_cmd_entry *entry,
628 struct gk20a_fence *fence)
629{
630 struct gk20a_channel_semaphore *sema =
631 container_of(s, struct gk20a_channel_semaphore, ops);
632 struct channel_gk20a *c = sema->c;
633#ifdef CONFIG_SYNC
634 struct nvgpu_semaphore *fp_sema;
635 struct sync_fence *sync_fence;
636 struct priv_cmd_entry *wait_cmd = entry;
637 struct wait_fence_work *w = NULL;
638 int err, ret, status;
639
640 sync_fence = gk20a_sync_fence_fdget(fd);
641 if (!sync_fence)
642 return -EINVAL;
643
644 ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
645 if (ret == 0) {
646 if (fp_sema) {
647 err = gk20a_fence_from_semaphore(c->g, fence,
648 sema->timeline,
649 fp_sema,
650 &c->semaphore_wq,
651 false, false);
652 if (err) {
653 nvgpu_semaphore_put(fp_sema);
654 goto clean_up_priv_cmd;
655 }
656 } else
657 /*
658 * Init an empty fence. It will instantly return
659 * from gk20a_fence_wait().
660 */
661 gk20a_init_fence(fence, NULL, NULL, false);
662
663 sync_fence_put(sync_fence);
664 goto skip_slow_path;
665 }
666
667 /* If the fence has signaled there is no reason to wait on it. */
668 status = atomic_read(&sync_fence->status);
669 if (status == 0) {
670 sync_fence_put(sync_fence);
671 goto skip_slow_path;
672 }
673
674 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
675 if (err) {
676 nvgpu_err(c->g,
677 "not enough priv cmd buffer space");
678 goto clean_up_sync_fence;
679 }
680
681 w = nvgpu_kzalloc(c->g, sizeof(*w));
682 if (!w) {
683 err = -ENOMEM;
684 goto clean_up_priv_cmd;
685 }
686
687 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
688 w->fence = sync_fence;
689 w->g = c->g;
690 w->ch = c;
691 w->sema = nvgpu_semaphore_alloc(c);
692 if (!w->sema) {
693 nvgpu_err(c->g, "ran out of semaphores");
694 err = -ENOMEM;
695 goto clean_up_worker;
696 }
697
698 /* worker takes one reference */
699 nvgpu_semaphore_get(w->sema);
700 nvgpu_semaphore_incr(w->sema);
701
702 /* GPU unblocked when the semaphore value increments. */
703 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
704
705 /*
706 * We need to create the fence before adding the waiter to ensure
707 * that we properly clean up in the event the sync_fence has
708 * already signaled
709 */
710 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
711 &c->semaphore_wq, false, false);
712 if (err)
713 goto clean_up_sema;
714
715 ret = sync_fence_wait_async(sync_fence, &w->waiter);
716 gk20a_add_pending_sema_wait(c->g, w);
717
718 /*
719 * If the sync_fence has already signaled then the above async_wait
720 * will never trigger. This causes the semaphore release op to never
721 * happen which, in turn, hangs the GPU. That's bad. So let's just
722 * do the nvgpu_semaphore_release() right now.
723 */
724 if (ret == 1) {
725 sync_fence_put(sync_fence);
726 nvgpu_semaphore_release(w->sema);
727 nvgpu_semaphore_put(w->sema);
728 }
729
730skip_slow_path:
731 return 0;
732
733clean_up_sema:
734 /*
735 * Release the refs to the semaphore, including
736 * the one for the worker since it will never run.
737 */
738 nvgpu_semaphore_put(w->sema);
739 nvgpu_semaphore_put(w->sema);
740clean_up_worker:
741 nvgpu_kfree(c->g, w);
742clean_up_priv_cmd:
743 gk20a_free_priv_cmdbuf(c, entry);
744clean_up_sync_fence:
745 sync_fence_put(sync_fence);
746 return err;
747#else
748 nvgpu_err(c->g,
749 "trying to use sync fds with CONFIG_SYNC disabled");
750 return -ENODEV;
751#endif
752}
753
754static int __gk20a_channel_semaphore_incr(
755 struct gk20a_channel_sync *s, bool wfi_cmd,
756 struct priv_cmd_entry *incr_cmd,
757 struct gk20a_fence *fence,
758 bool need_sync_fence)
759{
760 int incr_cmd_size;
761 struct gk20a_channel_semaphore *sp =
762 container_of(s, struct gk20a_channel_semaphore, ops);
763 struct channel_gk20a *c = sp->c;
764 struct nvgpu_semaphore *semaphore;
765 int err = 0;
766
767 semaphore = nvgpu_semaphore_alloc(c);
768 if (!semaphore) {
769 nvgpu_err(c->g,
770 "ran out of semaphores");
771 return -ENOMEM;
772 }
773
774 incr_cmd_size = 10;
775 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
776 if (err) {
777 nvgpu_err(c->g,
778 "not enough priv cmd buffer space");
779 goto clean_up_sema;
780 }
781
782 /* Release the completion semaphore. */
783 add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
784
785 err = gk20a_fence_from_semaphore(c->g, fence,
786 sp->timeline, semaphore,
787 &c->semaphore_wq,
788 wfi_cmd,
789 need_sync_fence);
790 if (err)
791 goto clean_up_sema;
792
793 return 0;
794
795clean_up_sema:
796 nvgpu_semaphore_put(semaphore);
797 return err;
798}
799
800static int gk20a_channel_semaphore_incr_wfi(
801 struct gk20a_channel_sync *s,
802 struct priv_cmd_entry *entry,
803 struct gk20a_fence *fence)
804{
805 return __gk20a_channel_semaphore_incr(s,
806 true /* wfi */,
807 entry, fence, true);
808}
809
810static int gk20a_channel_semaphore_incr(
811 struct gk20a_channel_sync *s,
812 struct priv_cmd_entry *entry,
813 struct gk20a_fence *fence,
814 bool need_sync_fence,
815 bool register_irq)
816{
817 /* Don't put wfi cmd to this one since we're not returning
818 * a fence to user space. */
819 return __gk20a_channel_semaphore_incr(s,
820 false /* no wfi */,
821 entry, fence, need_sync_fence);
822}
823
824static int gk20a_channel_semaphore_incr_user(
825 struct gk20a_channel_sync *s,
826 int wait_fence_fd,
827 struct priv_cmd_entry *entry,
828 struct gk20a_fence *fence,
829 bool wfi,
830 bool need_sync_fence,
831 bool register_irq)
832{
833#ifdef CONFIG_SYNC
834 int err;
835
836 err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence,
837 need_sync_fence);
838 if (err)
839 return err;
840
841 return 0;
842#else
843 struct gk20a_channel_semaphore *sema =
844 container_of(s, struct gk20a_channel_semaphore, ops);
845 nvgpu_err(sema->c->g,
846 "trying to use sync fds with CONFIG_SYNC disabled");
847 return -ENODEV;
848#endif
849}
850
851static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
852{
853 /* Nothing to do. */
854}
855
856static void gk20a_channel_semaphore_signal_timeline(
857 struct gk20a_channel_sync *s)
858{
859 struct gk20a_channel_semaphore *sp =
860 container_of(s, struct gk20a_channel_semaphore, ops);
861 gk20a_sync_timeline_signal(sp->timeline);
862}
863
864static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s)
865{
866 return -EINVAL;
867}
868
869static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
870{
871 struct gk20a_channel_semaphore *sema =
872 container_of(s, struct gk20a_channel_semaphore, ops);
873 if (sema->timeline)
874 gk20a_sync_timeline_destroy(sema->timeline);
875
876 /* The sema pool is cleaned up by the VM destroy. */
877 sema->pool = NULL;
878
879 nvgpu_kfree(sema->c->g, sema);
880}
881
882static struct gk20a_channel_sync *
883gk20a_channel_semaphore_create(struct channel_gk20a *c)
884{
885 int asid = -1;
886 struct gk20a_channel_semaphore *sema;
887 char pool_name[20];
888
889 if (WARN_ON(!c->vm))
890 return NULL;
891
892 sema = nvgpu_kzalloc(c->g, sizeof(*sema));
893 if (!sema)
894 return NULL;
895 sema->c = c;
896
897 if (c->vm->as_share)
898 asid = c->vm->as_share->id;
899
900 sprintf(pool_name, "semaphore_pool-%d", c->chid);
901 sema->pool = c->vm->sema_pool;
902
903#ifdef CONFIG_SYNC
904 sema->timeline = gk20a_sync_timeline_create(
905 "gk20a_ch%d_as%d", c->chid, asid);
906 if (!sema->timeline) {
907 gk20a_channel_semaphore_destroy(&sema->ops);
908 return NULL;
909 }
910#endif
911 nvgpu_atomic_set(&sema->ops.refcount, 0);
912 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
913 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
914 sema->ops.incr = gk20a_channel_semaphore_incr;
915 sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
916 sema->ops.incr_user = gk20a_channel_semaphore_incr_user;
917 sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
918 sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
919 sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id;
920 sema->ops.destroy = gk20a_channel_semaphore_destroy;
921
922 return &sema->ops;
923}
924
925void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
926{
927 sync->destroy(sync);
928}
929
930struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
931{
932#ifdef CONFIG_TEGRA_GK20A_NVHOST
933 if (gk20a_platform_has_syncpoints(c->g))
934 return gk20a_channel_syncpt_create(c);
935#endif
936 return gk20a_channel_semaphore_create(c);
937}
938
939bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
940{
941 return !gk20a_platform_has_syncpoints(g);
942}