diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-10-07 06:50:07 -0400 |
---|---|---|
committer | Sachin Nikam <snikam@nvidia.com> | 2015-12-08 04:18:04 -0500 |
commit | 52753b51f1dbf51221d7856a9288aad1ab2d351a (patch) | |
tree | 70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |
parent | 937de14907bbc238d180defc1afe036faa24f1bc (diff) |
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence())
for every submit
But not all submits request for a sync_fence.
Also, nvhost_sync_create_fence() API takes about 1/3rd of the total
submit path.
Hence to optimize, we can allocate sync_fence
only when user explicitly asks for it using
(NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
Also, in CDE path from gk20a_prepare_compressible_read(),
we reuse existing fence stored in "state" and that can
result into not returning sync_fence_fd when user asked
for it
Hence, force allocation of sync_fence when job submission
comes from CDE path
Bug 200141116
Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/812845
(cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98)
Reviewed-on: http://git-master/r/837662
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fence_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 54a288cd..ae19d36f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -194,7 +194,8 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { | |||
194 | }; | 194 | }; |
195 | 195 | ||
196 | struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | 196 | struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, |
197 | u32 id, u32 value, bool wfi) | 197 | u32 id, u32 value, bool wfi, |
198 | bool need_sync_fence) | ||
198 | { | 199 | { |
199 | struct gk20a_fence *f; | 200 | struct gk20a_fence *f; |
200 | struct sync_fence *sync_fence = NULL; | 201 | struct sync_fence *sync_fence = NULL; |
@@ -205,10 +206,12 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | |||
205 | .thresh = value | 206 | .thresh = value |
206 | }; | 207 | }; |
207 | 208 | ||
208 | sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, | 209 | if (need_sync_fence) { |
209 | "fence"); | 210 | sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, |
210 | if (IS_ERR(sync_fence)) | 211 | "fence"); |
211 | return NULL; | 212 | if (IS_ERR(sync_fence)) |
213 | return NULL; | ||
214 | } | ||
212 | #endif | 215 | #endif |
213 | 216 | ||
214 | f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); | 217 | f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); |