diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-08-11 19:40:35 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-16 00:58:36 -0400 |
commit | 9bd76b7fa08672181c2a0cce747a59664e226964 (patch) | |
tree | 3439f5e1354d5e23bcb7da98773f123e815eb9fe /drivers | |
parent | f919aab509aea3753785c2e1481b96d4daf7e4a8 (diff) |
gpu: nvgpu: Optimize sync fence creation
Only create sync-fences in the semaphore synchronization path
when they are actually needed (i.e requested by userspace).
Bug 1795076
Reviewed-on: http://git-master/r/1201564
(cherry picked from commit dc52d424a839e6c064c02b7f02905dd6a59a50af)
Change-Id: Ieac6aef415678d4ea982683a955897c64959436e
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1221041
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 2 |
3 files changed, 11 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index e4972610..98363c88 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -605,7 +605,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
605 | *fence = gk20a_fence_from_semaphore(sema->timeline, | 605 | *fence = gk20a_fence_from_semaphore(sema->timeline, |
606 | fp_sema, | 606 | fp_sema, |
607 | &c->semaphore_wq, | 607 | &c->semaphore_wq, |
608 | NULL, false); | 608 | NULL, false, false); |
609 | else | 609 | else |
610 | /* | 610 | /* |
611 | * Allocate an empty fence. It will instantly return | 611 | * Allocate an empty fence. It will instantly return |
@@ -676,7 +676,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
676 | semaphore while the job is in flight. */ | 676 | semaphore while the job is in flight. */ |
677 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, | 677 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, |
678 | &c->semaphore_wq, | 678 | &c->semaphore_wq, |
679 | NULL, false); | 679 | NULL, false, false); |
680 | 680 | ||
681 | skip_slow_path: | 681 | skip_slow_path: |
682 | *entry = wait_cmd; | 682 | *entry = wait_cmd; |
@@ -734,7 +734,8 @@ static int __gk20a_channel_semaphore_incr( | |||
734 | 734 | ||
735 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, | 735 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, |
736 | &c->semaphore_wq, | 736 | &c->semaphore_wq, |
737 | dependency, wfi_cmd); | 737 | dependency, wfi_cmd, |
738 | need_sync_fence); | ||
738 | *entry = incr_cmd; | 739 | *entry = incr_cmd; |
739 | return 0; | 740 | return 0; |
740 | } | 741 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index fbbaa2a7..596dc549 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -148,17 +148,19 @@ struct gk20a_fence *gk20a_fence_from_semaphore( | |||
148 | struct gk20a_semaphore *semaphore, | 148 | struct gk20a_semaphore *semaphore, |
149 | wait_queue_head_t *semaphore_wq, | 149 | wait_queue_head_t *semaphore_wq, |
150 | struct sync_fence *dependency, | 150 | struct sync_fence *dependency, |
151 | bool wfi) | 151 | bool wfi, bool need_sync_fence) |
152 | { | 152 | { |
153 | struct gk20a_fence *f; | 153 | struct gk20a_fence *f; |
154 | struct sync_fence *sync_fence = NULL; | 154 | struct sync_fence *sync_fence = NULL; |
155 | 155 | ||
156 | #ifdef CONFIG_SYNC | 156 | #ifdef CONFIG_SYNC |
157 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, | 157 | if (need_sync_fence) { |
158 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, | ||
158 | dependency, "f-gk20a-0x%04x", | 159 | dependency, "f-gk20a-0x%04x", |
159 | gk20a_semaphore_gpu_ro_va(semaphore)); | 160 | gk20a_semaphore_gpu_ro_va(semaphore)); |
160 | if (!sync_fence) | 161 | if (!sync_fence) |
161 | return NULL; | 162 | return NULL; |
163 | } | ||
162 | #endif | 164 | #endif |
163 | 165 | ||
164 | f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi); | 166 | f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi); |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index ff00bcf1..35488ea3 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h | |||
@@ -52,7 +52,7 @@ struct gk20a_fence *gk20a_fence_from_semaphore( | |||
52 | struct gk20a_semaphore *semaphore, | 52 | struct gk20a_semaphore *semaphore, |
53 | wait_queue_head_t *semaphore_wq, | 53 | wait_queue_head_t *semaphore_wq, |
54 | struct sync_fence *dependency, | 54 | struct sync_fence *dependency, |
55 | bool wfi); | 55 | bool wfi, bool need_sync_fence); |
56 | 56 | ||
57 | struct gk20a_fence *gk20a_fence_from_syncpt( | 57 | struct gk20a_fence *gk20a_fence_from_syncpt( |
58 | struct platform_device *host1x_pdev, | 58 | struct platform_device *host1x_pdev, |