summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-10-07 06:50:07 -0400
committerSachin Nikam <snikam@nvidia.com>2015-12-08 04:18:04 -0500
commit52753b51f1dbf51221d7856a9288aad1ab2d351a (patch)
tree70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
parent937de14907bbc238d180defc1afe036faa24f1bc (diff)
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence()) for every submit But not all submits request for a sync_fence. Also, nvhost_sync_create_fence() API takes about 1/3rd of the total submit path. Hence to optimize, we can allocate sync_fence only when user explicitly asks for it using (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET && NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) Also, in CDE path from gk20a_prepare_compressible_read(), we reuse existing fence stored in "state" and that can result into not returning sync_fence_fd when user asked for it Hence, force allocation of sync_fence when job submission comes from CDE path Bug 200141116 Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/812845 (cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98) Reviewed-on: http://git-master/r/837662 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h8
1 files changed, 5 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index a347cbab..618e1b26 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Channel Synchronization Abstraction 4 * GK20A Channel Synchronization Abstraction
5 * 5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -54,7 +54,8 @@ struct gk20a_channel_sync {
54 */ 54 */
55 int (*incr)(struct gk20a_channel_sync *s, 55 int (*incr)(struct gk20a_channel_sync *s,
56 struct priv_cmd_entry **entry, 56 struct priv_cmd_entry **entry,
57 struct gk20a_fence **fence); 57 struct gk20a_fence **fence,
58 bool need_sync_fence);
58 59
59 /* Increment syncpoint/semaphore, preceded by a wfi. 60 /* Increment syncpoint/semaphore, preceded by a wfi.
60 * Returns 61 * Returns
@@ -76,7 +77,8 @@ struct gk20a_channel_sync {
76 int wait_fence_fd, 77 int wait_fence_fd,
77 struct priv_cmd_entry **entry, 78 struct priv_cmd_entry **entry,
78 struct gk20a_fence **fence, 79 struct gk20a_fence **fence,
79 bool wfi); 80 bool wfi,
81 bool need_sync_fence);
80 82
81 /* Reset the channel syncpoint/semaphore. */ 83 /* Reset the channel syncpoint/semaphore. */
82 void (*set_min_eq_max)(struct gk20a_channel_sync *s); 84 void (*set_min_eq_max)(struct gk20a_channel_sync *s);