summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-10-07 06:50:07 -0400
committerSachin Nikam <snikam@nvidia.com>2015-12-08 04:18:04 -0500
commit52753b51f1dbf51221d7856a9288aad1ab2d351a (patch)
tree70a9dbdba1087797202ec3e1a584408d82947bd9 /drivers/gpu
parent937de14907bbc238d180defc1afe036faa24f1bc (diff)
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence()) for every submit But not all submits request for a sync_fence. Also, nvhost_sync_create_fence() API takes about 1/3rd of the total submit path. Hence to optimize, we can allocate sync_fence only when user explicitly asks for it using (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET && NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) Also, in CDE path from gk20a_prepare_compressible_read(), we reuse existing fence stored in "state" and that can result into not returning sync_fence_fd when user asked for it Hence, force allocation of sync_fence when job submission comes from CDE path Bug 200141116 Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/812845 (cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98) Reviewed-on: http://git-master/r/837662 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c20
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c32
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.h5
7 files changed, 54 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index e1edec2a..a2f7e7a4 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -724,7 +724,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
724 } 724 }
725 725
726 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, 726 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
727 num_entries, flags, fence, fence_out); 727 num_entries, flags, fence, fence_out, true);
728} 728}
729 729
730static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) 730static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 59c3e31d..98c8760e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1830,7 +1830,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1830 u32 num_entries, 1830 u32 num_entries,
1831 u32 flags, 1831 u32 flags,
1832 struct nvgpu_fence *fence, 1832 struct nvgpu_fence *fence,
1833 struct gk20a_fence **fence_out) 1833 struct gk20a_fence **fence_out,
1834 bool force_need_sync_fence)
1834{ 1835{
1835 struct gk20a *g = c->g; 1836 struct gk20a *g = c->g;
1836 struct device *d = dev_from_gk20a(g); 1837 struct device *d = dev_from_gk20a(g);
@@ -1848,6 +1849,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1848 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; 1849 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
1849 bool skip_buffer_refcounting = (flags & 1850 bool skip_buffer_refcounting = (flags &
1850 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); 1851 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1852 bool need_sync_fence = false;
1853
1854 /*
1855 * If user wants to allocate sync_fence_fd always, then respect that;
1856 * otherwise, allocate sync_fence_fd based on user flags only
1857 */
1858 if (force_need_sync_fence)
1859 need_sync_fence = true;
1851 1860
1852 if (c->has_timedout) 1861 if (c->has_timedout)
1853 return -ETIMEDOUT; 1862 return -ETIMEDOUT;
@@ -1970,15 +1979,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1970 goto clean_up; 1979 goto clean_up;
1971 } 1980 }
1972 1981
1982 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1983 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1984 need_sync_fence = true;
1973 1985
1974 /* always insert syncpt increment at end of gpfifo submission 1986 /* always insert syncpt increment at end of gpfifo submission
1975 to keep track of method completion for idle railgating */ 1987 to keep track of method completion for idle railgating */
1976 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 1988 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1977 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, 1989 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1978 &post_fence, need_wfi); 1990 &post_fence, need_wfi, need_sync_fence);
1979 else 1991 else
1980 err = c->sync->incr(c->sync, &incr_cmd, 1992 err = c->sync->incr(c->sync, &incr_cmd,
1981 &post_fence); 1993 &post_fence, need_sync_fence);
1982 if (err) { 1994 if (err) {
1983 mutex_unlock(&c->submit_lock); 1995 mutex_unlock(&c->submit_lock);
1984 goto clean_up; 1996 goto clean_up;
@@ -2578,7 +2590,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2578 2590
2579 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, 2591 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2580 args->flags, &args->fence, 2592 args->flags, &args->fence,
2581 &fence_out); 2593 &fence_out, false);
2582 2594
2583 if (ret) 2595 if (ret)
2584 goto clean_up; 2596 goto clean_up;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 55528dd9..d3428788 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -244,7 +244,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
244 u32 num_entries, 244 u32 num_entries,
245 u32 flags, 245 u32 flags,
246 struct nvgpu_fence *fence, 246 struct nvgpu_fence *fence,
247 struct gk20a_fence **fence_out); 247 struct gk20a_fence **fence_out,
248 bool force_need_sync_fence);
248 249
249int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 250int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
250 struct nvgpu_alloc_gpfifo_args *args); 251 struct nvgpu_alloc_gpfifo_args *args);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 95647774..c0c8ec6d 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -166,7 +166,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
166 bool wfi_cmd, 166 bool wfi_cmd,
167 bool register_irq, 167 bool register_irq,
168 struct priv_cmd_entry **entry, 168 struct priv_cmd_entry **entry,
169 struct gk20a_fence **fence) 169 struct gk20a_fence **fence,
170 bool need_sync_fence)
170{ 171{
171 u32 thresh; 172 u32 thresh;
172 int incr_cmd_size; 173 int incr_cmd_size;
@@ -239,7 +240,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
239 } 240 }
240 241
241 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, 242 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
242 wfi_cmd); 243 wfi_cmd, need_sync_fence);
243 *entry = incr_cmd; 244 *entry = incr_cmd;
244 return 0; 245 return 0;
245} 246}
@@ -251,33 +252,35 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
251 return __gk20a_channel_syncpt_incr(s, 252 return __gk20a_channel_syncpt_incr(s,
252 true /* wfi */, 253 true /* wfi */,
253 false /* no irq handler */, 254 false /* no irq handler */,
254 entry, fence); 255 entry, fence, true);
255} 256}
256 257
257static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 258static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
258 struct priv_cmd_entry **entry, 259 struct priv_cmd_entry **entry,
259 struct gk20a_fence **fence) 260 struct gk20a_fence **fence,
261 bool need_sync_fence)
260{ 262{
261 /* Don't put wfi cmd to this one since we're not returning 263 /* Don't put wfi cmd to this one since we're not returning
262 * a fence to user space. */ 264 * a fence to user space. */
263 return __gk20a_channel_syncpt_incr(s, 265 return __gk20a_channel_syncpt_incr(s,
264 false /* no wfi */, 266 false /* no wfi */,
265 true /* register irq */, 267 true /* register irq */,
266 entry, fence); 268 entry, fence, need_sync_fence);
267} 269}
268 270
269static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, 271static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
270 int wait_fence_fd, 272 int wait_fence_fd,
271 struct priv_cmd_entry **entry, 273 struct priv_cmd_entry **entry,
272 struct gk20a_fence **fence, 274 struct gk20a_fence **fence,
273 bool wfi) 275 bool wfi,
276 bool need_sync_fence)
274{ 277{
275 /* Need to do 'wfi + host incr' since we return the fence 278 /* Need to do 'wfi + host incr' since we return the fence
276 * to user space. */ 279 * to user space. */
277 return __gk20a_channel_syncpt_incr(s, 280 return __gk20a_channel_syncpt_incr(s,
278 wfi, 281 wfi,
279 true /* register irq */, 282 true /* register irq */,
280 entry, fence); 283 entry, fence, need_sync_fence);
281} 284}
282 285
283static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) 286static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
@@ -513,7 +516,8 @@ static int __gk20a_channel_semaphore_incr(
513 struct gk20a_channel_sync *s, bool wfi_cmd, 516 struct gk20a_channel_sync *s, bool wfi_cmd,
514 struct sync_fence *dependency, 517 struct sync_fence *dependency,
515 struct priv_cmd_entry **entry, 518 struct priv_cmd_entry **entry,
516 struct gk20a_fence **fence) 519 struct gk20a_fence **fence,
520 bool need_sync_fence)
517{ 521{
518 u64 va; 522 u64 va;
519 int incr_cmd_size; 523 int incr_cmd_size;
@@ -560,18 +564,19 @@ static int gk20a_channel_semaphore_incr_wfi(
560 return __gk20a_channel_semaphore_incr(s, 564 return __gk20a_channel_semaphore_incr(s,
561 true /* wfi */, 565 true /* wfi */,
562 NULL, 566 NULL,
563 entry, fence); 567 entry, fence, true);
564} 568}
565 569
566static int gk20a_channel_semaphore_incr( 570static int gk20a_channel_semaphore_incr(
567 struct gk20a_channel_sync *s, 571 struct gk20a_channel_sync *s,
568 struct priv_cmd_entry **entry, 572 struct priv_cmd_entry **entry,
569 struct gk20a_fence **fence) 573 struct gk20a_fence **fence,
574 bool need_sync_fence)
570{ 575{
571 /* Don't put wfi cmd to this one since we're not returning 576 /* Don't put wfi cmd to this one since we're not returning
572 * a fence to user space. */ 577 * a fence to user space. */
573 return __gk20a_channel_semaphore_incr(s, false /* no wfi */, 578 return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
574 NULL, entry, fence); 579 NULL, entry, fence, need_sync_fence);
575} 580}
576 581
577static int gk20a_channel_semaphore_incr_user( 582static int gk20a_channel_semaphore_incr_user(
@@ -579,7 +584,8 @@ static int gk20a_channel_semaphore_incr_user(
579 int wait_fence_fd, 584 int wait_fence_fd,
580 struct priv_cmd_entry **entry, 585 struct priv_cmd_entry **entry,
581 struct gk20a_fence **fence, 586 struct gk20a_fence **fence,
582 bool wfi) 587 bool wfi,
588 bool need_sync_fence)
583{ 589{
584#ifdef CONFIG_SYNC 590#ifdef CONFIG_SYNC
585 struct sync_fence *dependency = NULL; 591 struct sync_fence *dependency = NULL;
@@ -592,7 +598,7 @@ static int gk20a_channel_semaphore_incr_user(
592 } 598 }
593 599
594 err = __gk20a_channel_semaphore_incr(s, wfi, dependency, 600 err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
595 entry, fence); 601 entry, fence, need_sync_fence);
596 if (err) { 602 if (err) {
597 if (dependency) 603 if (dependency)
598 sync_fence_put(dependency); 604 sync_fence_put(dependency);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index a347cbab..618e1b26 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Channel Synchronization Abstraction 4 * GK20A Channel Synchronization Abstraction
5 * 5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -54,7 +54,8 @@ struct gk20a_channel_sync {
54 */ 54 */
55 int (*incr)(struct gk20a_channel_sync *s, 55 int (*incr)(struct gk20a_channel_sync *s,
56 struct priv_cmd_entry **entry, 56 struct priv_cmd_entry **entry,
57 struct gk20a_fence **fence); 57 struct gk20a_fence **fence,
58 bool need_sync_fence);
58 59
59 /* Increment syncpoint/semaphore, preceded by a wfi. 60 /* Increment syncpoint/semaphore, preceded by a wfi.
60 * Returns 61 * Returns
@@ -76,7 +77,8 @@ struct gk20a_channel_sync {
76 int wait_fence_fd, 77 int wait_fence_fd,
77 struct priv_cmd_entry **entry, 78 struct priv_cmd_entry **entry,
78 struct gk20a_fence **fence, 79 struct gk20a_fence **fence,
79 bool wfi); 80 bool wfi,
81 bool need_sync_fence);
80 82
81 /* Reset the channel syncpoint/semaphore. */ 83 /* Reset the channel syncpoint/semaphore. */
82 void (*set_min_eq_max)(struct gk20a_channel_sync *s); 84 void (*set_min_eq_max)(struct gk20a_channel_sync *s);
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 54a288cd..ae19d36f 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -194,7 +194,8 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
194}; 194};
195 195
196struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, 196struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
197 u32 id, u32 value, bool wfi) 197 u32 id, u32 value, bool wfi,
198 bool need_sync_fence)
198{ 199{
199 struct gk20a_fence *f; 200 struct gk20a_fence *f;
200 struct sync_fence *sync_fence = NULL; 201 struct sync_fence *sync_fence = NULL;
@@ -205,10 +206,12 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
205 .thresh = value 206 .thresh = value
206 }; 207 };
207 208
208 sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, 209 if (need_sync_fence) {
209 "fence"); 210 sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
210 if (IS_ERR(sync_fence)) 211 "fence");
211 return NULL; 212 if (IS_ERR(sync_fence))
213 return NULL;
214 }
212#endif 215#endif
213 216
214 f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); 217 f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index 629dc694..75e135e9 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Fences 4 * GK20A Fences
5 * 5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -56,7 +56,8 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
56 56
57struct gk20a_fence *gk20a_fence_from_syncpt( 57struct gk20a_fence *gk20a_fence_from_syncpt(
58 struct platform_device *host1x_pdev, 58 struct platform_device *host1x_pdev,
59 u32 id, u32 value, bool wfi); 59 u32 id, u32 value, bool wfi,
60 bool need_sync_fence);
60 61
61/* Fence operations */ 62/* Fence operations */
62void gk20a_fence_put(struct gk20a_fence *f); 63void gk20a_fence_put(struct gk20a_fence *f);