From 7b8cbd2be37643e755ccb204e38c20b17aac28bd Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 28 Jul 2016 11:22:53 -0700 Subject: gpu: nvgpu: Greatly simplify the semaphore detection Greatly simplify and make more robust the gpu semaphore detection in sync_fences. Instead of using a magic number use the parent timeline of sync_pts. This will also work with multi-GPU setups using nvgpu since the timeline ops pointer will be the same across all instances of nvgpu. Bug 1732449 Reviewed-on: http://git-master/r/1203834 (cherry picked from commit 66eeb577eae5d10741fd15f3659e843c70792cd6) Change-Id: I4c6619d70b5531e2676e18d1330724e8f8b9bcb3 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1221042 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 19 +--- drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 124 +++++++++++---------------- drivers/gpu/nvgpu/gk20a/sync_gk20a.h | 9 +- 3 files changed, 51 insertions(+), 101 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 98363c88..0eff4e2f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -510,24 +510,7 @@ static int gk20a_channel_semaphore_wait_syncpt( return -ENODEV; } -/* - * UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18. - * But since there's no API for getting the underlying sync_pts we have to do - * some conditional compilation. - */ #ifdef CONFIG_SYNC -static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) - struct sync_pt *pt; - - pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); - return gk20a_sync_pt_inst_get_sema(pt); -#else - return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt); -#endif -} - /* * Attempt a fast path for waiting on a sync_fence. Basically if the passed * sync_fence is backed by a gk20a_semaphore then there's no reason to go @@ -551,7 +534,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, if (!gk20a_is_sema_backed_sync_fence(fence)) return -ENODEV; - sema = sema_from_sync_fence(fence); + sema = gk20a_sync_fence_get_sema(fence); /* * If there's no underlying sema then that means the underlying sema has diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index de30eed4..af6af70e 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c @@ -57,110 +57,85 @@ struct gk20a_sync_pt { struct gk20a_sync_pt_inst { struct sync_pt pt; - - /* - * Magic number to identify a gk20a_sync_pt_inst from either a struct - * fence or a struct sync_pt. - */ -#define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef; - u32 magic; - struct gk20a_sync_pt *shared; }; -/** - * Check if a sync_pt is a gk20a_sync_pt_inst. - */ -int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt) -{ - struct gk20a_sync_pt_inst *pti = - container_of(pt, struct gk20a_sync_pt_inst, pt); - - return pti->magic == GK20A_SYNC_PT_INST_MAGIC; -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) -/** - * Check if a fence is a gk20a_sync_pt_inst. - */ -int gk20a_is_gk20a_sync_pt_inst(struct fence *f) -{ - struct sync_pt *pt = container_of(f, struct sync_pt, base); - - return __gk20a_is_gk20a_sync_pt_inst(pt); -} - -/** - * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the - * passed fence is in fact a gk20a_sync_pt_inst - use - * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. - */ -struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f) -{ - struct sync_pt *pt = container_of(f, struct sync_pt, base); - struct gk20a_sync_pt_inst *pti = - container_of(pt, struct gk20a_sync_pt_inst, pt); - - BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f)); - - return pti->shared->sema; -} -#else -/** - * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the - * passed sync_pt is in fact a gk20a_sync_pt_inst - use - * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. - */ -struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt) -{ - struct gk20a_sync_pt_inst *pti; - - BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt)); - pti = container_of(pt, struct gk20a_sync_pt_inst, pt); - - return pti->shared->sema; -} -#endif - /** * Check if the passed sync_fence is backed by a single GPU semaphore. In such * cases we can short circuit a lot of SW involved in signaling pre-fences and * post fences. + * + * For now reject multi-sync_pt fences. This could be changed in future. It + * would require that the sema fast path push a sema acquire for each semaphore + * in the fence. */ int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) { + struct sync_timeline *t; + #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) - struct sync_pt *pt; + struct sync_pt *spt; int i = 0; if (list_empty(&fence->pt_list_head)) return 0; - /* - * For now reject multi-sync_pt fences. This could be changed in - * future. It would require that the sema fast path push a sema - * acquire for each semaphore in the fence. - */ - list_for_each_entry(pt, &fence->pt_list_head, pt_list) { + list_for_each_entry(spt, &fence->pt_list_head, pt_list) { i++; if (i >= 2) return 0; } - pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); - return __gk20a_is_gk20a_sync_pt_inst(pt); - + spt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); + t = spt->parent; #else - struct sync_fence_cb *cb0 = &fence->cbs[0]; + struct fence *pt = fence->cbs[0].sync_pt; + struct sync_pt *spt = sync_pt_from_fence(pt); if (fence->num_fences != 1) return 0; - return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt); + if (spt == NULL) + return 0; + + t = sync_pt_parent(spt); #endif + + if (t->ops == &gk20a_sync_timeline_ops) + return 1; + return 0; } +struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f) +{ + struct sync_pt *spt; + struct gk20a_sync_pt_inst *pti; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) + if (!f) + return NULL; + + if (!gk20a_is_sema_backed_sync_fence(f)) + return NULL; + + spt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); +#else + struct fence *pt; + + if (!f) + return NULL; + + if (!gk20a_is_sema_backed_sync_fence(f)) + return NULL; + + pt = f->cbs[0].sync_pt; + spt = sync_pt_from_fence(pt); +#endif + pti = container_of(spt, struct gk20a_sync_pt_inst, pt); + + return pti->shared->sema; +} /** * Compares sync pt values a and b, both of which will trigger either before @@ -283,7 +258,6 @@ static struct sync_pt *gk20a_sync_pt_create_inst( if (!pti) return NULL; - pti->magic = GK20A_SYNC_PT_INST_MAGIC; pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); if (!pti->shared) { sync_pt_free(&pti->pt); diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h index f885febd..e5b31471 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h @@ -27,15 +27,8 @@ struct sync_pt; struct gk20a_semaphore; struct fence; -int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt); -int gk20a_is_gk20a_sync_pt_inst(struct fence *f); int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) -struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f); -#else -struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt); -#endif +struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f); #ifdef CONFIG_SYNC struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); -- cgit v1.2.2