summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c175
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c90
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.h6
3 files changed, 123 insertions, 148 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 4b1be8b9..c6b55bf8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -502,10 +502,10 @@ static void gk20a_channel_semaphore_launcher(
502 502
503static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, 503static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
504 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, 504 struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
505 int cmd_size, bool acquire, bool wfi) 505 u32 offset, bool acquire, bool wfi)
506{ 506{
507 int ch = c->chid; 507 int ch = c->chid;
508 u32 ob, off = cmd->off; 508 u32 ob, off = cmd->off + offset;
509 u64 va; 509 u64 va;
510 510
511 ob = off; 511 ob = off;
@@ -588,108 +588,79 @@ static int gk20a_channel_semaphore_wait_syncpt(
588} 588}
589 589
590#ifdef CONFIG_SYNC 590#ifdef CONFIG_SYNC
591/* 591static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
592 * Attempt a fast path for waiting on a sync_fence. Basically if the passed 592 struct priv_cmd_entry *wait_cmd)
593 * sync_fence is backed by a nvgpu_semaphore then there's no reason to go
594 * through the rigmarole of setting up a separate semaphore which waits on an
595 * interrupt from the GPU and then triggers a worker thread to execute a SW
596 * based semaphore release. Instead just have the GPU wait on the same semaphore
597 * that is going to be incremented by the GPU.
598 *
599 * This function returns 2 possible values: -ENODEV or 0 on success. In the case
600 * of -ENODEV the fastpath cannot be taken due to the fence not being backed by
601 * a GPU semaphore.
602 */
603static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
604 struct sync_fence *fence,
605 struct priv_cmd_entry *wait_cmd,
606 struct nvgpu_semaphore **fp_sema)
607{ 593{
608 struct nvgpu_semaphore *sema; 594 struct sync_fence *sync_fence;
609 int err; 595 int err;
596 const int wait_cmd_size = 8;
597 int num_wait_cmds;
598 int i;
610 599
611 if (!gk20a_is_sema_backed_sync_fence(fence)) 600 sync_fence = gk20a_sync_fence_fdget(fd);
612 return -ENODEV; 601 if (!sync_fence)
613 602 return -EINVAL;
614 sema = gk20a_sync_fence_get_sema(fence);
615 603
616 /* 604 num_wait_cmds = sync_fence->num_fences;
617 * If there's no underlying sema then that means the underlying sema has 605 if (num_wait_cmds == 0) {
618 * already signaled. 606 err = 0;
619 */ 607 goto put_fence;
620 if (!sema) {
621 *fp_sema = NULL;
622 return 0;
623 } 608 }
624 609
625 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); 610 err = gk20a_channel_alloc_priv_cmdbuf(c,
626 if (err) 611 wait_cmd_size * num_wait_cmds,
627 return err; 612 wait_cmd);
613 if (err) {
614 nvgpu_err(c->g, "not enough priv cmd buffer space");
615 goto put_fence;
616 }
628 617
629 nvgpu_semaphore_get(sema); 618 for (i = 0; i < sync_fence->num_fences; i++) {
630 BUG_ON(!sema->incremented); 619 struct fence *f = sync_fence->cbs[i].sync_pt;
631 add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false); 620 struct sync_pt *pt = sync_pt_from_fence(f);
621 struct nvgpu_semaphore *sema;
632 622
633 /* 623 sema = gk20a_sync_pt_sema(pt);
634 * Make sure that gk20a_channel_semaphore_wait_fd() can create another 624 if (!sema) {
635 * fence with the underlying semaphore. 625 /* expired */
636 */ 626 nvgpu_memset(c->g, wait_cmd->mem,
637 *fp_sema = sema; 627 (wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
628 0, wait_cmd_size * sizeof(u32));
629 } else {
630 WARN_ON(!sema->incremented);
631 add_sema_cmd(c->g, c, sema, wait_cmd,
632 i * wait_cmd_size, true, false);
633 nvgpu_semaphore_put(sema);
634 }
635 }
638 636
639 return 0; 637put_fence:
638 sync_fence_put(sync_fence);
639 return err;
640} 640}
641#endif
642 641
643static int gk20a_channel_semaphore_wait_fd( 642static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd,
644 struct gk20a_channel_sync *s, int fd, 643 struct priv_cmd_entry *wait_cmd,
645 struct priv_cmd_entry *entry, 644 struct gk20a_fence *fence_out,
646 struct gk20a_fence *fence) 645 struct sync_timeline *timeline)
647{ 646{
648 struct gk20a_channel_semaphore *sema = 647 const int wait_cmd_size = 8;
649 container_of(s, struct gk20a_channel_semaphore, ops);
650 struct channel_gk20a *c = sema->c;
651#ifdef CONFIG_SYNC
652 struct nvgpu_semaphore *fp_sema;
653 struct sync_fence *sync_fence; 648 struct sync_fence *sync_fence;
654 struct priv_cmd_entry *wait_cmd = entry;
655 struct wait_fence_work *w = NULL; 649 struct wait_fence_work *w = NULL;
656 int err, ret, status; 650 int err, status;
657 651
658 sync_fence = gk20a_sync_fence_fdget(fd); 652 sync_fence = sync_fence_fdget(fd);
659 if (!sync_fence) 653 if (!sync_fence)
660 return -EINVAL; 654 return -EINVAL;
661 655
662 ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
663 if (ret == 0) {
664 if (fp_sema) {
665 err = gk20a_fence_from_semaphore(c->g, fence,
666 sema->timeline,
667 fp_sema,
668 &c->semaphore_wq,
669 false);
670 if (err) {
671 nvgpu_semaphore_put(fp_sema);
672 goto clean_up_priv_cmd;
673 }
674 } else
675 /*
676 * Init an empty fence. It will instantly return
677 * from gk20a_fence_wait().
678 */
679 gk20a_init_fence(fence, NULL, NULL);
680
681 sync_fence_put(sync_fence);
682 goto skip_slow_path;
683 }
684
685 /* If the fence has signaled there is no reason to wait on it. */ 656 /* If the fence has signaled there is no reason to wait on it. */
686 status = atomic_read(&sync_fence->status); 657 status = atomic_read(&sync_fence->status);
687 if (status == 0) { 658 if (status == 0) {
688 sync_fence_put(sync_fence); 659 sync_fence_put(sync_fence);
689 goto skip_slow_path; 660 return 0;
690 } 661 }
691 662
692 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); 663 err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd);
693 if (err) { 664 if (err) {
694 nvgpu_err(c->g, 665 nvgpu_err(c->g,
695 "not enough priv cmd buffer space"); 666 "not enough priv cmd buffer space");
@@ -718,34 +689,34 @@ static int gk20a_channel_semaphore_wait_fd(
718 nvgpu_semaphore_incr(w->sema, c->hw_sema); 689 nvgpu_semaphore_incr(w->sema, c->hw_sema);
719 690
720 /* GPU unblocked when the semaphore value increments. */ 691 /* GPU unblocked when the semaphore value increments. */
721 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); 692 add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false);
722 693
723 /* 694 /*
724 * We need to create the fence before adding the waiter to ensure 695 * We need to create the fence before adding the waiter to ensure
725 * that we properly clean up in the event the sync_fence has 696 * that we properly clean up in the event the sync_fence has
726 * already signaled 697 * already signaled
727 */ 698 */
728 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, 699 err = gk20a_fence_from_semaphore(c->g, fence_out, timeline,
729 &c->semaphore_wq, false); 700 w->sema, &c->semaphore_wq, false);
730 if (err) 701 if (err)
731 goto clean_up_sema; 702 goto clean_up_sema;
732 703
733 ret = sync_fence_wait_async(sync_fence, &w->waiter); 704 err = sync_fence_wait_async(sync_fence, &w->waiter);
734 gk20a_add_pending_sema_wait(c->g, w); 705 gk20a_add_pending_sema_wait(c->g, w);
735 706
736 /* 707 /*
737 * If the sync_fence has already signaled then the above async_wait 708 * If the sync_fence has already signaled then the above wait_async
738 * will never trigger. This causes the semaphore release op to never 709 * will not get scheduled; the fence completed just after doing the
739 * happen which, in turn, hangs the GPU. That's bad. So let's just 710 * status check above before allocs and waiter init, and won the race.
740 * do the nvgpu_semaphore_release() right now. 711 * This causes the waiter to be skipped, so let's release the semaphore
712 * here and put the refs taken for the worker.
741 */ 713 */
742 if (ret == 1) { 714 if (err == 1) {
743 sync_fence_put(sync_fence); 715 sync_fence_put(sync_fence);
744 nvgpu_semaphore_release(w->sema, c->hw_sema); 716 nvgpu_semaphore_release(w->sema, c->hw_sema);
745 nvgpu_semaphore_put(w->sema); 717 nvgpu_semaphore_put(w->sema);
746 } 718 }
747 719
748skip_slow_path:
749 return 0; 720 return 0;
750 721
751clean_up_sema: 722clean_up_sema:
@@ -758,10 +729,28 @@ clean_up_sema:
758clean_up_worker: 729clean_up_worker:
759 nvgpu_kfree(c->g, w); 730 nvgpu_kfree(c->g, w);
760clean_up_priv_cmd: 731clean_up_priv_cmd:
761 gk20a_free_priv_cmdbuf(c, entry); 732 gk20a_free_priv_cmdbuf(c, wait_cmd);
762clean_up_sync_fence: 733clean_up_sync_fence:
763 sync_fence_put(sync_fence); 734 sync_fence_put(sync_fence);
764 return err; 735 return err;
736}
737#endif
738
739static int gk20a_channel_semaphore_wait_fd(
740 struct gk20a_channel_sync *s, int fd,
741 struct priv_cmd_entry *entry,
742 struct gk20a_fence *fence)
743{
744 struct gk20a_channel_semaphore *sema =
745 container_of(s, struct gk20a_channel_semaphore, ops);
746 struct channel_gk20a *c = sema->c;
747#ifdef CONFIG_SYNC
748 int err;
749
750 err = semaphore_wait_fd_native(c, fd, entry);
751 if (err)
752 err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline);
753 return err;
765#else 754#else
766 nvgpu_err(c->g, 755 nvgpu_err(c->g,
767 "trying to use sync fds with CONFIG_SYNC disabled"); 756 "trying to use sync fds with CONFIG_SYNC disabled");
@@ -798,7 +787,7 @@ static int __gk20a_channel_semaphore_incr(
798 } 787 }
799 788
800 /* Release the completion semaphore. */ 789 /* Release the completion semaphore. */
801 add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); 790 add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd);
802 791
803 err = gk20a_fence_from_semaphore(c->g, fence, 792 err = gk20a_fence_from_semaphore(c->g, fence,
804 sp->timeline, semaphore, 793 sp->timeline, semaphore,
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index f6d16b90..a8600bce 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Sync Framework Integration 2 * GK20A Sync Framework Integration
3 * 3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -70,55 +70,6 @@ struct gk20a_sync_pt_inst {
70}; 70};
71 71
72/** 72/**
73 * Check if the passed sync_fence is backed by a single GPU semaphore. In such
74 * cases we can short circuit a lot of SW involved in signaling pre-fences and
75 * post fences.
76 *
77 * For now reject multi-sync_pt fences. This could be changed in future. It
78 * would require that the sema fast path push a sema acquire for each semaphore
79 * in the fence.
80 */
81int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
82{
83 struct sync_timeline *t;
84
85 struct fence *pt = fence->cbs[0].sync_pt;
86 struct sync_pt *spt = sync_pt_from_fence(pt);
87
88 if (fence->num_fences != 1)
89 return 0;
90
91 if (spt == NULL)
92 return 0;
93
94 t = sync_pt_parent(spt);
95
96 if (t->ops == &gk20a_sync_timeline_ops)
97 return 1;
98 return 0;
99}
100
101struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f)
102{
103 struct sync_pt *spt;
104 struct gk20a_sync_pt_inst *pti;
105
106 struct fence *pt;
107
108 if (!f)
109 return NULL;
110
111 if (!gk20a_is_sema_backed_sync_fence(f))
112 return NULL;
113
114 pt = f->cbs[0].sync_pt;
115 spt = sync_pt_from_fence(pt);
116 pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
117
118 return pti->shared->sema;
119}
120
121/**
122 * Compares sync pt values a and b, both of which will trigger either before 73 * Compares sync pt values a and b, both of which will trigger either before
123 * or after ref (i.e. a and b trigger before ref, or a and b trigger after 74 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
124 * ref). Supplying ref allows us to handle wrapping correctly. 75 * ref). Supplying ref allows us to handle wrapping correctly.
@@ -371,7 +322,44 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
371 322
372struct sync_fence *gk20a_sync_fence_fdget(int fd) 323struct sync_fence *gk20a_sync_fence_fdget(int fd)
373{ 324{
374 return sync_fence_fdget(fd); 325 struct sync_fence *fence = sync_fence_fdget(fd);
326 int i;
327
328 if (!fence)
329 return NULL;
330
331 for (i = 0; i < fence->num_fences; i++) {
332 struct fence *pt = fence->cbs[i].sync_pt;
333 struct sync_pt *spt = sync_pt_from_fence(pt);
334 struct sync_timeline *t;
335
336 if (spt == NULL) {
337 sync_fence_put(fence);
338 return NULL;
339 }
340
341 t = sync_pt_parent(spt);
342 if (t->ops != &gk20a_sync_timeline_ops) {
343 sync_fence_put(fence);
344 return NULL;
345 }
346 }
347
348 return fence;
349}
350
351struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
352{
353 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
354 struct nvgpu_semaphore *sema;
355
356 nvgpu_spinlock_acquire(&pt->lock);
357 sema = pt->sema;
358 if (sema)
359 nvgpu_semaphore_get(sema);
360 nvgpu_spinlock_release(&pt->lock);
361
362 return sema;
375} 363}
376 364
377void gk20a_sync_timeline_signal(struct sync_timeline *timeline) 365void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 7d7aff6d..8a6439ab 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Sync Framework Integration 4 * GK20A Sync Framework Integration
5 * 5 *
6 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a 8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"), 9 * copy of this software and associated documentation files (the "Software"),
@@ -33,9 +33,6 @@ struct sync_pt;
33struct nvgpu_semaphore; 33struct nvgpu_semaphore;
34struct fence; 34struct fence;
35 35
36int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence);
37struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f);
38
39#ifdef CONFIG_SYNC 36#ifdef CONFIG_SYNC
40struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); 37struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
41void gk20a_sync_timeline_destroy(struct sync_timeline *); 38void gk20a_sync_timeline_destroy(struct sync_timeline *);
@@ -46,6 +43,7 @@ struct sync_fence *gk20a_sync_fence_create(
46 struct nvgpu_semaphore *, 43 struct nvgpu_semaphore *,
47 const char *fmt, ...); 44 const char *fmt, ...);
48struct sync_fence *gk20a_sync_fence_fdget(int fd); 45struct sync_fence *gk20a_sync_fence_fdget(int fd);
46struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
49#else 47#else
50static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} 48static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
51static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} 49static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}