diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-10-07 06:50:07 -0400 |
---|---|---|
committer | Sachin Nikam <snikam@nvidia.com> | 2015-12-08 04:18:04 -0500 |
commit | 52753b51f1dbf51221d7856a9288aad1ab2d351a (patch) | |
tree | 70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |
parent | 937de14907bbc238d180defc1afe036faa24f1bc (diff) |
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence())
for every submit
But not all submits request for a sync_fence.
Also, nvhost_sync_create_fence() API takes about 1/3rd of the total
submit path.
Hence to optimize, we can allocate sync_fence
only when user explicitly asks for it using
(NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
Also, in CDE path from gk20a_prepare_compressible_read(),
we reuse existing fence stored in "state" and that can
result into not returning sync_fence_fd when user asked
for it
Hence, force allocation of sync_fence when job submission
comes from CDE path
Bug 200141116
Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/812845
(cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98)
Reviewed-on: http://git-master/r/837662
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 95647774..c0c8ec6d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -166,7 +166,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
166 | bool wfi_cmd, | 166 | bool wfi_cmd, |
167 | bool register_irq, | 167 | bool register_irq, |
168 | struct priv_cmd_entry **entry, | 168 | struct priv_cmd_entry **entry, |
169 | struct gk20a_fence **fence) | 169 | struct gk20a_fence **fence, |
170 | bool need_sync_fence) | ||
170 | { | 171 | { |
171 | u32 thresh; | 172 | u32 thresh; |
172 | int incr_cmd_size; | 173 | int incr_cmd_size; |
@@ -239,7 +240,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
239 | } | 240 | } |
240 | 241 | ||
241 | *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, | 242 | *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, |
242 | wfi_cmd); | 243 | wfi_cmd, need_sync_fence); |
243 | *entry = incr_cmd; | 244 | *entry = incr_cmd; |
244 | return 0; | 245 | return 0; |
245 | } | 246 | } |
@@ -251,33 +252,35 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, | |||
251 | return __gk20a_channel_syncpt_incr(s, | 252 | return __gk20a_channel_syncpt_incr(s, |
252 | true /* wfi */, | 253 | true /* wfi */, |
253 | false /* no irq handler */, | 254 | false /* no irq handler */, |
254 | entry, fence); | 255 | entry, fence, true); |
255 | } | 256 | } |
256 | 257 | ||
257 | static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | 258 | static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, |
258 | struct priv_cmd_entry **entry, | 259 | struct priv_cmd_entry **entry, |
259 | struct gk20a_fence **fence) | 260 | struct gk20a_fence **fence, |
261 | bool need_sync_fence) | ||
260 | { | 262 | { |
261 | /* Don't put wfi cmd to this one since we're not returning | 263 | /* Don't put wfi cmd to this one since we're not returning |
262 | * a fence to user space. */ | 264 | * a fence to user space. */ |
263 | return __gk20a_channel_syncpt_incr(s, | 265 | return __gk20a_channel_syncpt_incr(s, |
264 | false /* no wfi */, | 266 | false /* no wfi */, |
265 | true /* register irq */, | 267 | true /* register irq */, |
266 | entry, fence); | 268 | entry, fence, need_sync_fence); |
267 | } | 269 | } |
268 | 270 | ||
269 | static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, | 271 | static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, |
270 | int wait_fence_fd, | 272 | int wait_fence_fd, |
271 | struct priv_cmd_entry **entry, | 273 | struct priv_cmd_entry **entry, |
272 | struct gk20a_fence **fence, | 274 | struct gk20a_fence **fence, |
273 | bool wfi) | 275 | bool wfi, |
276 | bool need_sync_fence) | ||
274 | { | 277 | { |
275 | /* Need to do 'wfi + host incr' since we return the fence | 278 | /* Need to do 'wfi + host incr' since we return the fence |
276 | * to user space. */ | 279 | * to user space. */ |
277 | return __gk20a_channel_syncpt_incr(s, | 280 | return __gk20a_channel_syncpt_incr(s, |
278 | wfi, | 281 | wfi, |
279 | true /* register irq */, | 282 | true /* register irq */, |
280 | entry, fence); | 283 | entry, fence, need_sync_fence); |
281 | } | 284 | } |
282 | 285 | ||
283 | static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) | 286 | static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) |
@@ -513,7 +516,8 @@ static int __gk20a_channel_semaphore_incr( | |||
513 | struct gk20a_channel_sync *s, bool wfi_cmd, | 516 | struct gk20a_channel_sync *s, bool wfi_cmd, |
514 | struct sync_fence *dependency, | 517 | struct sync_fence *dependency, |
515 | struct priv_cmd_entry **entry, | 518 | struct priv_cmd_entry **entry, |
516 | struct gk20a_fence **fence) | 519 | struct gk20a_fence **fence, |
520 | bool need_sync_fence) | ||
517 | { | 521 | { |
518 | u64 va; | 522 | u64 va; |
519 | int incr_cmd_size; | 523 | int incr_cmd_size; |
@@ -560,18 +564,19 @@ static int gk20a_channel_semaphore_incr_wfi( | |||
560 | return __gk20a_channel_semaphore_incr(s, | 564 | return __gk20a_channel_semaphore_incr(s, |
561 | true /* wfi */, | 565 | true /* wfi */, |
562 | NULL, | 566 | NULL, |
563 | entry, fence); | 567 | entry, fence, true); |
564 | } | 568 | } |
565 | 569 | ||
566 | static int gk20a_channel_semaphore_incr( | 570 | static int gk20a_channel_semaphore_incr( |
567 | struct gk20a_channel_sync *s, | 571 | struct gk20a_channel_sync *s, |
568 | struct priv_cmd_entry **entry, | 572 | struct priv_cmd_entry **entry, |
569 | struct gk20a_fence **fence) | 573 | struct gk20a_fence **fence, |
574 | bool need_sync_fence) | ||
570 | { | 575 | { |
571 | /* Don't put wfi cmd to this one since we're not returning | 576 | /* Don't put wfi cmd to this one since we're not returning |
572 | * a fence to user space. */ | 577 | * a fence to user space. */ |
573 | return __gk20a_channel_semaphore_incr(s, false /* no wfi */, | 578 | return __gk20a_channel_semaphore_incr(s, false /* no wfi */, |
574 | NULL, entry, fence); | 579 | NULL, entry, fence, need_sync_fence); |
575 | } | 580 | } |
576 | 581 | ||
577 | static int gk20a_channel_semaphore_incr_user( | 582 | static int gk20a_channel_semaphore_incr_user( |
@@ -579,7 +584,8 @@ static int gk20a_channel_semaphore_incr_user( | |||
579 | int wait_fence_fd, | 584 | int wait_fence_fd, |
580 | struct priv_cmd_entry **entry, | 585 | struct priv_cmd_entry **entry, |
581 | struct gk20a_fence **fence, | 586 | struct gk20a_fence **fence, |
582 | bool wfi) | 587 | bool wfi, |
588 | bool need_sync_fence) | ||
583 | { | 589 | { |
584 | #ifdef CONFIG_SYNC | 590 | #ifdef CONFIG_SYNC |
585 | struct sync_fence *dependency = NULL; | 591 | struct sync_fence *dependency = NULL; |
@@ -592,7 +598,7 @@ static int gk20a_channel_semaphore_incr_user( | |||
592 | } | 598 | } |
593 | 599 | ||
594 | err = __gk20a_channel_semaphore_incr(s, wfi, dependency, | 600 | err = __gk20a_channel_semaphore_incr(s, wfi, dependency, |
595 | entry, fence); | 601 | entry, fence, need_sync_fence); |
596 | if (err) { | 602 | if (err) { |
597 | if (dependency) | 603 | if (dependency) |
598 | sync_fence_put(dependency); | 604 | sync_fence_put(dependency); |