diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-10-07 06:50:07 -0400 |
---|---|---|
committer | Sachin Nikam <snikam@nvidia.com> | 2015-12-08 04:18:04 -0500 |
commit | 52753b51f1dbf51221d7856a9288aad1ab2d351a (patch) | |
tree | 70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | 937de14907bbc238d180defc1afe036faa24f1bc (diff) |
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence())
for every submit
But not all submits request for a sync_fence.
Also, nvhost_sync_create_fence() API takes about 1/3rd of the total
submit path.
Hence to optimize, we can allocate sync_fence
only when user explicitly asks for it using
(NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
Also, in CDE path from gk20a_prepare_compressible_read(),
we reuse existing fence stored in "state" and that can
result into not returning sync_fence_fd when user asked
for it
Hence, force allocation of sync_fence when job submission
comes from CDE path
Bug 200141116
Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/812845
(cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98)
Reviewed-on: http://git-master/r/837662
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 59c3e31d..98c8760e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1830,7 +1830,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1830 | u32 num_entries, | 1830 | u32 num_entries, |
1831 | u32 flags, | 1831 | u32 flags, |
1832 | struct nvgpu_fence *fence, | 1832 | struct nvgpu_fence *fence, |
1833 | struct gk20a_fence **fence_out) | 1833 | struct gk20a_fence **fence_out, |
1834 | bool force_need_sync_fence) | ||
1834 | { | 1835 | { |
1835 | struct gk20a *g = c->g; | 1836 | struct gk20a *g = c->g; |
1836 | struct device *d = dev_from_gk20a(g); | 1837 | struct device *d = dev_from_gk20a(g); |
@@ -1848,6 +1849,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1848 | struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; | 1849 | struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; |
1849 | bool skip_buffer_refcounting = (flags & | 1850 | bool skip_buffer_refcounting = (flags & |
1850 | NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); | 1851 | NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); |
1852 | bool need_sync_fence = false; | ||
1853 | |||
1854 | /* | ||
1855 | * If user wants to allocate sync_fence_fd always, then respect that; | ||
1856 | * otherwise, allocate sync_fence_fd based on user flags only | ||
1857 | */ | ||
1858 | if (force_need_sync_fence) | ||
1859 | need_sync_fence = true; | ||
1851 | 1860 | ||
1852 | if (c->has_timedout) | 1861 | if (c->has_timedout) |
1853 | return -ETIMEDOUT; | 1862 | return -ETIMEDOUT; |
@@ -1970,15 +1979,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1970 | goto clean_up; | 1979 | goto clean_up; |
1971 | } | 1980 | } |
1972 | 1981 | ||
1982 | if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && | ||
1983 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) | ||
1984 | need_sync_fence = true; | ||
1973 | 1985 | ||
1974 | /* always insert syncpt increment at end of gpfifo submission | 1986 | /* always insert syncpt increment at end of gpfifo submission |
1975 | to keep track of method completion for idle railgating */ | 1987 | to keep track of method completion for idle railgating */ |
1976 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | 1988 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) |
1977 | err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, | 1989 | err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, |
1978 | &post_fence, need_wfi); | 1990 | &post_fence, need_wfi, need_sync_fence); |
1979 | else | 1991 | else |
1980 | err = c->sync->incr(c->sync, &incr_cmd, | 1992 | err = c->sync->incr(c->sync, &incr_cmd, |
1981 | &post_fence); | 1993 | &post_fence, need_sync_fence); |
1982 | if (err) { | 1994 | if (err) { |
1983 | mutex_unlock(&c->submit_lock); | 1995 | mutex_unlock(&c->submit_lock); |
1984 | goto clean_up; | 1996 | goto clean_up; |
@@ -2578,7 +2590,7 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
2578 | 2590 | ||
2579 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, | 2591 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, |
2580 | args->flags, &args->fence, | 2592 | args->flags, &args->fence, |
2581 | &fence_out); | 2593 | &fence_out, false); |
2582 | 2594 | ||
2583 | if (ret) | 2595 | if (ret) |
2584 | goto clean_up; | 2596 | goto clean_up; |