From 52753b51f1dbf51221d7856a9288aad1ab2d351a Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Wed, 7 Oct 2015 16:20:07 +0530 Subject: gpu: nvgpu: create sync_fence only if needed Currently, we create sync_fence (from nvhost_sync_create_fence()) for every submit But not all submits request for a sync_fence. Also, nvhost_sync_create_fence() API takes about 1/3rd of the total submit path. Hence to optimize, we can allocate sync_fence only when user explicitly asks for it using (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET && NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) Also, in CDE path from gk20a_prepare_compressible_read(), we reuse existing fence stored in "state" and that can result into not returning sync_fence_fd when user asked for it Hence, force allocation of sync_fence when job submission comes from CDE path Bug 200141116 Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/812845 (cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98) Reviewed-on: http://git-master/r/837662 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sachin Nikam --- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 20 +++++++++++++---- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 3 ++- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 32 +++++++++++++++++----------- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 8 ++++--- drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 13 ++++++----- drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 5 +++-- 7 files changed, 54 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a') diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index e1edec2a..a2f7e7a4 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -724,7 +724,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, } return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, - num_entries, flags, fence, fence_out); + num_entries, flags, fence, fence_out, true); } static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 59c3e31d..98c8760e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1830,7 +1830,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, u32 num_entries, u32 flags, struct nvgpu_fence *fence, - struct gk20a_fence **fence_out) + struct gk20a_fence **fence_out, + bool force_need_sync_fence) { struct gk20a *g = c->g; struct device *d = dev_from_gk20a(g); @@ -1848,6 +1849,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; bool skip_buffer_refcounting = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); + bool need_sync_fence = false; + + /* + * If user wants to allocate sync_fence_fd always, then respect that; + * otherwise, allocate sync_fence_fd based on user flags only + */ + if (force_need_sync_fence) + need_sync_fence = true; if (c->has_timedout) return -ETIMEDOUT; @@ -1970,15 +1979,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, goto clean_up; } + if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && + (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) + need_sync_fence = true; /* always insert syncpt increment at end of gpfifo submission to keep track of method completion for idle railgating */ if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, - &post_fence, need_wfi); + &post_fence, need_wfi, need_sync_fence); else err = c->sync->incr(c->sync, &incr_cmd, - &post_fence); + &post_fence, need_sync_fence); if (err) { mutex_unlock(&c->submit_lock); goto clean_up; @@ -2578,7 +2590,7 @@ static int gk20a_ioctl_channel_submit_gpfifo( ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, args->flags, &args->fence, - &fence_out); + &fence_out, false); if (ret) goto clean_up; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 55528dd9..d3428788 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -244,7 +244,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, u32 num_entries, u32 flags, struct nvgpu_fence *fence, - struct gk20a_fence **fence_out); + struct gk20a_fence **fence_out, + bool force_need_sync_fence); int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_alloc_gpfifo_args *args); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 95647774..c0c8ec6d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -166,7 +166,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, bool wfi_cmd, bool register_irq, struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct gk20a_fence **fence, + bool need_sync_fence) { u32 thresh; int incr_cmd_size; @@ -239,7 +240,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, } *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, - wfi_cmd); + wfi_cmd, need_sync_fence); *entry = incr_cmd; return 0; } @@ -251,33 +252,35 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, return __gk20a_channel_syncpt_incr(s, true /* wfi */, false /* no irq handler */, - entry, fence); + entry, fence, true); } static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct gk20a_fence **fence, + bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning * a fence to user space. */ return __gk20a_channel_syncpt_incr(s, false /* no wfi */, true /* register irq */, - entry, fence); + entry, fence, need_sync_fence); } static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence, - bool wfi) + bool wfi, + bool need_sync_fence) { /* Need to do 'wfi + host incr' since we return the fence * to user space. */ return __gk20a_channel_syncpt_incr(s, wfi, true /* register irq */, - entry, fence); + entry, fence, need_sync_fence); } static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) @@ -513,7 +516,8 @@ static int __gk20a_channel_semaphore_incr( struct gk20a_channel_sync *s, bool wfi_cmd, struct sync_fence *dependency, struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct gk20a_fence **fence, + bool need_sync_fence) { u64 va; int incr_cmd_size; @@ -560,18 +564,19 @@ static int gk20a_channel_semaphore_incr_wfi( return __gk20a_channel_semaphore_incr(s, true /* wfi */, NULL, - entry, fence); + entry, fence, true); } static int gk20a_channel_semaphore_incr( struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct gk20a_fence **fence, + bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning * a fence to user space. */ return __gk20a_channel_semaphore_incr(s, false /* no wfi */, - NULL, entry, fence); + NULL, entry, fence, need_sync_fence); } static int gk20a_channel_semaphore_incr_user( @@ -579,7 +584,8 @@ static int gk20a_channel_semaphore_incr_user( int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence, - bool wfi) + bool wfi, + bool need_sync_fence) { #ifdef CONFIG_SYNC struct sync_fence *dependency = NULL; @@ -592,7 +598,7 @@ static int gk20a_channel_semaphore_incr_user( } err = __gk20a_channel_semaphore_incr(s, wfi, dependency, - entry, fence); + entry, fence, need_sync_fence); if (err) { if (dependency) sync_fence_put(dependency); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index a347cbab..618e1b26 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -3,7 +3,7 @@ * * GK20A Channel Synchronization Abstraction * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -54,7 +54,8 @@ struct gk20a_channel_sync { */ int (*incr)(struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, - struct gk20a_fence **fence); + struct gk20a_fence **fence, + bool need_sync_fence); /* Increment syncpoint/semaphore, preceded by a wfi. * Returns @@ -76,7 +77,8 @@ struct gk20a_channel_sync { int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence, - bool wfi); + bool wfi, + bool need_sync_fence); /* Reset the channel syncpoint/semaphore. */ void (*set_min_eq_max)(struct gk20a_channel_sync *s); diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 54a288cd..ae19d36f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -194,7 +194,8 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { }; struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, - u32 id, u32 value, bool wfi) + u32 id, u32 value, bool wfi, + bool need_sync_fence) { struct gk20a_fence *f; struct sync_fence *sync_fence = NULL; @@ -205,10 +206,12 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, .thresh = value }; - sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, - "fence"); - if (IS_ERR(sync_fence)) - return NULL; + if (need_sync_fence) { + sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, + "fence"); + if (IS_ERR(sync_fence)) + return NULL; + } #endif f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index 629dc694..75e135e9 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h @@ -3,7 +3,7 @@ * * GK20A Fences * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -56,7 +56,8 @@ struct gk20a_fence *gk20a_fence_from_semaphore( struct gk20a_fence *gk20a_fence_from_syncpt( struct platform_device *host1x_pdev, - u32 id, u32 value, bool wfi); + u32 id, u32 value, bool wfi, + bool need_sync_fence); /* Fence operations */ void gk20a_fence_put(struct gk20a_fence *f); -- cgit v1.2.2