summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
diff options
context:
space:
mode:
authorLauri Peltonen <lpeltonen@nvidia.com>2014-02-25 06:31:47 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:08 -0400
commite204224b26e6b5f609bc4e542368c1a13aeece61 (patch)
tree9d351eb734a54ff677a2f26fec2d6f96adc1e220 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
parent4d278fdfd70082da3c020a15ba0dd722f9da1b3b (diff)
gpu: nvgpu: Add semaphore based gk20a_channel_sync
Add semaphore implementation of the gk20a_channel_sync interface. Each channel has one semaphore pool, which is mapped as read-write to the channel vm. We allocate one or two semaphores from the pool for each submit. The first semaphore is only needed if we need to wait for an opaque sync fd. In that case, we allocate the semaphore, and ask GPU to wait for it's value to become 1 (semaphore acquire method). We also queue a kernel work that waits on the fence fd, and subsequently releases the semaphore (sets its value to 1) so that the command buffer can proceed. The second semaphore is used on every submit, and is used for work completion tracking. The GPU sets its value to 1 when the command buffer has been processed. The channel jobs need to hold references to both semaphores so that their backing semaphore pool slots are not reused while the job is in flight. Therefore gk20a_channel_fence will keep a reference to the semaphore that it represents (channel fences are stored in the job structure). This means that we must diligently close and dup the gk20a_channel_fence objects to avoid leaking semaphores. Bug 1450122 Bug 1445450 Change-Id: Ib61091a1b7632fa36efe0289011040ef7c4ae8f8 Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-on: http://git-master/r/374844 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c424
1 files changed, 419 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index f91dd52d..677c4b49 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -19,6 +19,9 @@
19 19
20#include "channel_sync_gk20a.h" 20#include "channel_sync_gk20a.h"
21#include "gk20a.h" 21#include "gk20a.h"
22#include "semaphore_gk20a.h"
23#include "sync_gk20a.h"
24#include "mm_gk20a.h"
22 25
23#ifdef CONFIG_SYNC 26#ifdef CONFIG_SYNC
24#include "../../../staging/android/sync.h" 27#include "../../../staging/android/sync.h"
@@ -74,7 +77,8 @@ bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
74} 77}
75 78
76int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, 79int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
77 u32 thresh, struct priv_cmd_entry **entry) 80 u32 thresh, struct priv_cmd_entry **entry,
81 struct gk20a_channel_fence *fence)
78{ 82{
79 struct gk20a_channel_syncpt *sp = 83 struct gk20a_channel_syncpt *sp =
80 container_of(s, struct gk20a_channel_syncpt, ops); 84 container_of(s, struct gk20a_channel_syncpt, ops);
@@ -99,11 +103,13 @@ int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
99 add_wait_cmd(&wait_cmd->ptr[0], id, thresh); 103 add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
100 104
101 *entry = wait_cmd; 105 *entry = wait_cmd;
106 fence->valid = false;
102 return 0; 107 return 0;
103} 108}
104 109
105int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, 110int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
106 struct priv_cmd_entry **entry) 111 struct priv_cmd_entry **entry,
112 struct gk20a_channel_fence *fence)
107{ 113{
108#ifdef CONFIG_SYNC 114#ifdef CONFIG_SYNC
109 int i; 115 int i;
@@ -158,6 +164,7 @@ int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
158 sync_fence_put(sync_fence); 164 sync_fence_put(sync_fence);
159 165
160 *entry = wait_cmd; 166 *entry = wait_cmd;
167 fence->valid = false;
161 return 0; 168 return 0;
162#else 169#else
163 return -ENODEV; 170 return -ENODEV;
@@ -301,6 +308,7 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
301} 308}
302 309
303int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, 310int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
311 int wait_fence_fd,
304 struct priv_cmd_entry **entry, 312 struct priv_cmd_entry **entry,
305 struct gk20a_channel_fence *fence, 313 struct gk20a_channel_fence *fence,
306 bool wfi, 314 bool wfi,
@@ -366,18 +374,424 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
366 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; 374 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
367 sp->ops.destroy = gk20a_channel_syncpt_destroy; 375 sp->ops.destroy = gk20a_channel_syncpt_destroy;
368 376
369 sp->ops.syncpt_aggressive_destroy = true; 377 sp->ops.aggressive_destroy = true;
370 378
371 return &sp->ops; 379 return &sp->ops;
372} 380}
373#endif /* CONFIG_TEGRA_GK20A */ 381#endif /* CONFIG_TEGRA_GK20A */
374 382
383struct gk20a_channel_semaphore {
384 struct gk20a_channel_sync ops;
385 struct channel_gk20a *c;
386
387 /* A semaphore pool owned by this channel. */
388 struct gk20a_semaphore_pool *pool;
389
390 /* A sync timeline that advances when gpu completes work. */
391 struct sync_timeline *timeline;
392};
393
394#ifdef CONFIG_SYNC
395struct wait_fence_work {
396 struct sync_fence_waiter waiter;
397 struct channel_gk20a *ch;
398 struct gk20a_semaphore *sema;
399};
400
401static void gk20a_channel_semaphore_launcher(
402 struct sync_fence *fence,
403 struct sync_fence_waiter *waiter)
404{
405 int err;
406 struct wait_fence_work *w =
407 container_of(waiter, struct wait_fence_work, waiter);
408 struct gk20a *g = w->ch->g;
409
410 gk20a_dbg_info("waiting for pre fence %p '%s'",
411 fence, fence->name);
412 err = sync_fence_wait(fence, -1);
413 if (err < 0)
414 dev_err(&g->dev->dev, "error waiting pre-fence: %d\n", err);
415
416 gk20a_dbg_info(
417 "wait completed (%d) for fence %p '%s', triggering gpu work",
418 err, fence, fence->name);
419 sync_fence_put(fence);
420 gk20a_semaphore_release(w->sema);
421 gk20a_semaphore_put(w->sema);
422 kfree(w);
423}
424#endif
425
426static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,
427 bool acquire, bool wfi)
428{
429 int i = 0;
430 /* semaphore_a */
431 ptr[i++] = 0x20010004;
432 /* offset_upper */
433 ptr[i++] = (sema >> 32) & 0xff;
434 /* semaphore_b */
435 ptr[i++] = 0x20010005;
436 /* offset */
437 ptr[i++] = sema & 0xffffffff;
438 /* semaphore_c */
439 ptr[i++] = 0x20010006;
440 /* payload */
441 ptr[i++] = payload;
442 if (acquire) {
443 /* semaphore_d */
444 ptr[i++] = 0x20010007;
445 /* operation: acq_geq, switch_en */
446 ptr[i++] = 0x4 | (0x1 << 12);
447 } else {
448 /* semaphore_d */
449 ptr[i++] = 0x20010007;
450 /* operation: release, wfi */
451 ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20);
452 /* non_stall_int */
453 ptr[i++] = 0x20010008;
454 /* ignored */
455 ptr[i++] = 0;
456 }
457 return i;
458}
459
460static int gk20a_channel_semaphore_wait_cpu(
461 struct gk20a_channel_sync *s,
462 struct gk20a_channel_fence *fence,
463 int timeout)
464{
465 int remain;
466 struct gk20a_channel_semaphore *sp =
467 container_of(s, struct gk20a_channel_semaphore, ops);
468 if (!fence->valid || WARN_ON(!fence->semaphore))
469 return 0;
470
471 remain = wait_event_interruptible_timeout(
472 sp->c->semaphore_wq,
473 !gk20a_semaphore_is_acquired(fence->semaphore),
474 timeout);
475 if (remain == 0 && gk20a_semaphore_is_acquired(fence->semaphore))
476 return -ETIMEDOUT;
477 else if (remain < 0)
478 return remain;
479 return 0;
480}
481
482static bool gk20a_channel_semaphore_is_expired(
483 struct gk20a_channel_sync *s,
484 struct gk20a_channel_fence *fence)
485{
486 bool expired;
487 struct gk20a_channel_semaphore *sp =
488 container_of(s, struct gk20a_channel_semaphore, ops);
489 if (!fence->valid || WARN_ON(!fence->semaphore))
490 return true;
491
492 expired = !gk20a_semaphore_is_acquired(fence->semaphore);
493 if (expired)
494 gk20a_sync_timeline_signal(sp->timeline);
495 return expired;
496}
497
498static int gk20a_channel_semaphore_wait_syncpt(
499 struct gk20a_channel_sync *s, u32 id,
500 u32 thresh, struct priv_cmd_entry **entry,
501 struct gk20a_channel_fence *fence)
502{
503 struct gk20a_channel_semaphore *sema =
504 container_of(s, struct gk20a_channel_semaphore, ops);
505 struct device *dev = dev_from_gk20a(sema->c->g);
506 gk20a_err(dev, "trying to use syncpoint synchronization");
507 return -ENODEV;
508}
509
510static int gk20a_channel_semaphore_wait_fd(
511 struct gk20a_channel_sync *s, int fd,
512 struct priv_cmd_entry **entry,
513 struct gk20a_channel_fence *fence)
514{
515 struct gk20a_channel_semaphore *sema =
516 container_of(s, struct gk20a_channel_semaphore, ops);
517 struct channel_gk20a *c = sema->c;
518#ifdef CONFIG_SYNC
519 struct sync_fence *sync_fence;
520 struct priv_cmd_entry *wait_cmd = NULL;
521 struct wait_fence_work *w;
522 int written;
523 int err;
524 u64 va;
525
526 sync_fence = gk20a_sync_fence_fdget(fd);
527 if (!sync_fence)
528 return -EINVAL;
529
530 w = kzalloc(sizeof(*w), GFP_KERNEL);
531 if (!w) {
532 err = -ENOMEM;
533 goto fail;
534 }
535 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
536 w->ch = c;
537 w->sema = gk20a_semaphore_alloc(sema->pool);
538 if (!w->sema) {
539 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
540 err = -EAGAIN;
541 goto fail;
542 }
543
544 gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
545 if (wait_cmd == NULL) {
546 gk20a_err(dev_from_gk20a(c->g),
547 "not enough priv cmd buffer space");
548 err = -EAGAIN;
549 goto fail;
550 }
551
552 va = gk20a_semaphore_gpu_va(w->sema, c->vm);
553 /* GPU unblocked when when the semaphore value becomes 1. */
554 written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);
555 WARN_ON(written != wait_cmd->size);
556 sync_fence_wait_async(sync_fence, &w->waiter);
557
558 *entry = wait_cmd;
559 return 0;
560fail:
561 if (w && w->sema)
562 gk20a_semaphore_put(w->sema);
563 kfree(w);
564 sync_fence_put(sync_fence);
565 return err;
566#else
567 gk20a_err(dev_from_gk20a(c->g),
568 "trying to use sync fds with CONFIG_SYNC disabled");
569 return -ENODEV;
570#endif
571}
572
573static int __gk20a_channel_semaphore_incr(
574 struct gk20a_channel_sync *s, bool wfi_cmd,
575 struct priv_cmd_entry **entry,
576 struct gk20a_channel_fence *fence)
577{
578 u64 va;
579 int incr_cmd_size;
580 int written;
581 struct priv_cmd_entry *incr_cmd = NULL;
582 struct gk20a_channel_semaphore *sp =
583 container_of(s, struct gk20a_channel_semaphore, ops);
584 struct channel_gk20a *c = sp->c;
585 struct gk20a_semaphore *semaphore;
586
587 semaphore = gk20a_semaphore_alloc(sp->pool);
588 if (!semaphore) {
589 gk20a_err(dev_from_gk20a(c->g),
590 "ran out of semaphores");
591 return -EAGAIN;
592 }
593
594 incr_cmd_size = 10;
595 gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
596 if (incr_cmd == NULL) {
597 gk20a_err(dev_from_gk20a(c->g),
598 "not enough priv cmd buffer space");
599 gk20a_semaphore_put(semaphore);
600 return -EAGAIN;
601 }
602
603 /* Release the completion semaphore. */
604 va = gk20a_semaphore_gpu_va(semaphore, c->vm);
605 written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);
606 WARN_ON(written != incr_cmd_size);
607
608 fence->valid = true;
609 fence->wfi = wfi_cmd;
610 fence->semaphore = semaphore;
611 *entry = incr_cmd;
612 return 0;
613}
614
615static int gk20a_channel_semaphore_incr_wfi(
616 struct gk20a_channel_sync *s,
617 struct priv_cmd_entry **entry,
618 struct gk20a_channel_fence *fence)
619{
620 return __gk20a_channel_semaphore_incr(s,
621 true /* wfi */,
622 entry, fence);
623}
624
625static int gk20a_channel_semaphore_incr(
626 struct gk20a_channel_sync *s,
627 struct priv_cmd_entry **entry,
628 struct gk20a_channel_fence *fence)
629{
630 /* Don't put wfi cmd to this one since we're not returning
631 * a fence to user space. */
632 return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
633 entry, fence);
634}
635
636static int gk20a_channel_semaphore_incr_user_syncpt(
637 struct gk20a_channel_sync *s,
638 struct priv_cmd_entry **entry,
639 struct gk20a_channel_fence *fence,
640 bool wfi,
641 u32 *id, u32 *thresh)
642{
643 struct gk20a_channel_semaphore *sema =
644 container_of(s, struct gk20a_channel_semaphore, ops);
645 struct device *dev = dev_from_gk20a(sema->c->g);
646 gk20a_err(dev, "trying to use syncpoint synchronization");
647 return -ENODEV;
648}
649
650static int gk20a_channel_semaphore_incr_user_fd(
651 struct gk20a_channel_sync *s,
652 int wait_fence_fd,
653 struct priv_cmd_entry **entry,
654 struct gk20a_channel_fence *fence,
655 bool wfi,
656 int *fd)
657{
658 struct gk20a_channel_semaphore *sema =
659 container_of(s, struct gk20a_channel_semaphore, ops);
660#ifdef CONFIG_SYNC
661 struct sync_fence *dependency = NULL;
662 int err;
663
664 err = __gk20a_channel_semaphore_incr(s, wfi,
665 entry, fence);
666 if (err)
667 return err;
668
669 if (wait_fence_fd >= 0) {
670 dependency = gk20a_sync_fence_fdget(wait_fence_fd);
671 if (!dependency)
672 return -EINVAL;
673 }
674
675 *fd = gk20a_sync_fence_create(sema->timeline, fence->semaphore,
676 dependency, "fence");
677 if (*fd < 0) {
678 if (dependency)
679 sync_fence_put(dependency);
680 return *fd;
681 }
682 return 0;
683#else
684 gk20a_err(dev_from_gk20a(sema->c->g),
685 "trying to use sync fds with CONFIG_SYNC disabled");
686 return -ENODEV;
687#endif
688}
689
690static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
691{
692 /* Nothing to do. */
693}
694
695static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
696{
697 struct gk20a_channel_semaphore *sema =
698 container_of(s, struct gk20a_channel_semaphore, ops);
699 if (sema->timeline)
700 gk20a_sync_timeline_destroy(sema->timeline);
701 if (sema->pool) {
702 gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm);
703 gk20a_semaphore_pool_put(sema->pool);
704 }
705 kfree(sema);
706}
707
708static struct gk20a_channel_sync *
709gk20a_channel_semaphore_create(struct channel_gk20a *c)
710{
711 int err;
712 int asid = -1;
713 struct gk20a_channel_semaphore *sema;
714 char pool_name[20];
715
716 if (WARN_ON(!c->vm))
717 return NULL;
718
719 sema = kzalloc(sizeof(*sema), GFP_KERNEL);
720 if (!sema)
721 return NULL;
722 sema->c = c;
723
724 if (c->vm->as_share)
725 asid = c->vm->as_share->id;
726
727 /* A pool of 256 semaphores fits into one 4k page. */
728 sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
729 sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g),
730 pool_name, 256);
731 if (!sema->pool)
732 goto clean_up;
733
734 /* Map the semaphore pool to the channel vm. Map as read-write to the
735 * owner channel (all other channels should map as read only!). */
736 err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none);
737 if (err)
738 goto clean_up;
739
740#ifdef CONFIG_SYNC
741 sema->timeline = gk20a_sync_timeline_create(
742 "gk20a_ch%d_as%d", c->hw_chid, asid);
743 if (!sema->timeline)
744 goto clean_up;
745#endif
746 sema->ops.wait_cpu = gk20a_channel_semaphore_wait_cpu;
747 sema->ops.is_expired = gk20a_channel_semaphore_is_expired;
748 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
749 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
750 sema->ops.incr = gk20a_channel_semaphore_incr;
751 sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
752 sema->ops.incr_user_syncpt = gk20a_channel_semaphore_incr_user_syncpt;
753 sema->ops.incr_user_fd = gk20a_channel_semaphore_incr_user_fd;
754 sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
755 sema->ops.destroy = gk20a_channel_semaphore_destroy;
756
757 /* Aggressively destroying the semaphore sync would cause overhead
758 * since the pool needs to be mapped to GMMU. */
759 sema->ops.aggressive_destroy = false;
760
761 return &sema->ops;
762clean_up:
763 gk20a_channel_semaphore_destroy(&sema->ops);
764 return NULL;
765}
766
375struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) 767struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
376{ 768{
377#ifdef CONFIG_TEGRA_GK20A 769#ifdef CONFIG_TEGRA_GK20A
378 if (gk20a_platform_has_syncpoints(c->g->dev)) 770 if (gk20a_platform_has_syncpoints(c->g->dev))
379 return gk20a_channel_syncpt_create(c); 771 return gk20a_channel_syncpt_create(c);
380#endif 772#endif
381 WARN_ON(1); 773 return gk20a_channel_semaphore_create(c);
382 return NULL; 774}
775
776static inline bool gk20a_channel_fence_is_closed(struct gk20a_channel_fence *f)
777{
778 if (f->valid || f->semaphore)
779 return false;
780 return true;
781}
782
783void gk20a_channel_fence_close(struct gk20a_channel_fence *f)
784{
785 if (f->semaphore)
786 gk20a_semaphore_put(f->semaphore);
787 memset(f, 0, sizeof(*f));
788}
789
790void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
791 struct gk20a_channel_fence *to)
792{
793 WARN_ON(!gk20a_channel_fence_is_closed(to));
794 *to = *from;
795 if (to->semaphore)
796 gk20a_semaphore_get(to->semaphore);
383} 797}