diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-10-07 06:50:07 -0400 |
---|---|---|
committer | Sachin Nikam <snikam@nvidia.com> | 2015-12-08 04:18:04 -0500 |
commit | 52753b51f1dbf51221d7856a9288aad1ab2d351a (patch) | |
tree | 70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |
parent | 937de14907bbc238d180defc1afe036faa24f1bc (diff) |
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence())
for every submit
But not all submits request for a sync_fence.
Also, nvhost_sync_create_fence() API takes about 1/3rd of the total
submit path.
Hence to optimize, we can allocate sync_fence
only when user explicitly asks for it using
(NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
Also, in CDE path from gk20a_prepare_compressible_read(),
we reuse existing fence stored in "state" and that can
result into not returning sync_fence_fd when user asked
for it
Hence, force allocation of sync_fence when job submission
comes from CDE path
Bug 200141116
Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/812845
(cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98)
Reviewed-on: http://git-master/r/837662
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index a347cbab..618e1b26 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Channel Synchronization Abstraction | 4 | * GK20A Channel Synchronization Abstraction |
5 | * | 5 | * |
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -54,7 +54,8 @@ struct gk20a_channel_sync { | |||
54 | */ | 54 | */ |
55 | int (*incr)(struct gk20a_channel_sync *s, | 55 | int (*incr)(struct gk20a_channel_sync *s, |
56 | struct priv_cmd_entry **entry, | 56 | struct priv_cmd_entry **entry, |
57 | struct gk20a_fence **fence); | 57 | struct gk20a_fence **fence, |
58 | bool need_sync_fence); | ||
58 | 59 | ||
59 | /* Increment syncpoint/semaphore, preceded by a wfi. | 60 | /* Increment syncpoint/semaphore, preceded by a wfi. |
60 | * Returns | 61 | * Returns |
@@ -76,7 +77,8 @@ struct gk20a_channel_sync { | |||
76 | int wait_fence_fd, | 77 | int wait_fence_fd, |
77 | struct priv_cmd_entry **entry, | 78 | struct priv_cmd_entry **entry, |
78 | struct gk20a_fence **fence, | 79 | struct gk20a_fence **fence, |
79 | bool wfi); | 80 | bool wfi, |
81 | bool need_sync_fence); | ||
80 | 82 | ||
81 | /* Reset the channel syncpoint/semaphore. */ | 83 | /* Reset the channel syncpoint/semaphore. */ |
82 | void (*set_min_eq_max)(struct gk20a_channel_sync *s); | 84 | void (*set_min_eq_max)(struct gk20a_channel_sync *s); |