summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-10-07 06:50:07 -0400
committerSachin Nikam <snikam@nvidia.com>2015-12-08 04:18:04 -0500
commit52753b51f1dbf51221d7856a9288aad1ab2d351a (patch)
tree70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent937de14907bbc238d180defc1afe036faa24f1bc (diff)
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence()) for every submit But not all submits request for a sync_fence. Also, nvhost_sync_create_fence() API takes about 1/3rd of the total submit path. Hence to optimize, we can allocate sync_fence only when user explicitly asks for it using (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET && NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) Also, in CDE path from gk20a_prepare_compressible_read(), we reuse existing fence stored in "state" and that can result into not returning sync_fence_fd when user asked for it Hence, force allocation of sync_fence when job submission comes from CDE path Bug 200141116 Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/812845 (cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98) Reviewed-on: http://git-master/r/837662 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c20
1 files changed, 16 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 59c3e31d..98c8760e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1830,7 +1830,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1830 u32 num_entries, 1830 u32 num_entries,
1831 u32 flags, 1831 u32 flags,
1832 struct nvgpu_fence *fence, 1832 struct nvgpu_fence *fence,
1833 struct gk20a_fence **fence_out) 1833 struct gk20a_fence **fence_out,
1834 bool force_need_sync_fence)
1834{ 1835{
1835 struct gk20a *g = c->g; 1836 struct gk20a *g = c->g;
1836 struct device *d = dev_from_gk20a(g); 1837 struct device *d = dev_from_gk20a(g);
@@ -1848,6 +1849,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1848 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; 1849 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
1849 bool skip_buffer_refcounting = (flags & 1850 bool skip_buffer_refcounting = (flags &
1850 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); 1851 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1852 bool need_sync_fence = false;
1853
1854 /*
1855 * If user wants to allocate sync_fence_fd always, then respect that;
1856 * otherwise, allocate sync_fence_fd based on user flags only
1857 */
1858 if (force_need_sync_fence)
1859 need_sync_fence = true;
1851 1860
1852 if (c->has_timedout) 1861 if (c->has_timedout)
1853 return -ETIMEDOUT; 1862 return -ETIMEDOUT;
@@ -1970,15 +1979,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1970 goto clean_up; 1979 goto clean_up;
1971 } 1980 }
1972 1981
1982 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1983 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1984 need_sync_fence = true;
1973 1985
1974 /* always insert syncpt increment at end of gpfifo submission 1986 /* always insert syncpt increment at end of gpfifo submission
1975 to keep track of method completion for idle railgating */ 1987 to keep track of method completion for idle railgating */
1976 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 1988 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1977 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, 1989 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1978 &post_fence, need_wfi); 1990 &post_fence, need_wfi, need_sync_fence);
1979 else 1991 else
1980 err = c->sync->incr(c->sync, &incr_cmd, 1992 err = c->sync->incr(c->sync, &incr_cmd,
1981 &post_fence); 1993 &post_fence, need_sync_fence);
1982 if (err) { 1994 if (err) {
1983 mutex_unlock(&c->submit_lock); 1995 mutex_unlock(&c->submit_lock);
1984 goto clean_up; 1996 goto clean_up;
@@ -2578,7 +2590,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2578 2590
2579 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, 2591 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2580 args->flags, &args->fence, 2592 args->flags, &args->fence,
2581 &fence_out); 2593 &fence_out, false);
2582 2594
2583 if (ret) 2595 if (ret)
2584 goto clean_up; 2596 goto clean_up;