From 7b8cbd2be37643e755ccb204e38c20b17aac28bd Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 28 Jul 2016 11:22:53 -0700 Subject: gpu: nvgpu: Greatly simplify the semaphore detection Greatly simplify and make more robust the gpu semaphore detection in sync_fences. Instead of using a magic number use the parent timeline of sync_pts. This will also work with multi-GPU setups using nvgpu since the timeline ops pointer will be the same across all instances of nvgpu. Bug 1732449 Reviewed-on: http://git-master/r/1203834 (cherry picked from commit 66eeb577eae5d10741fd15f3659e843c70792cd6) Change-Id: I4c6619d70b5531e2676e18d1330724e8f8b9bcb3 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1221042 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 124 ++++++++++++++--------------------- 1 file changed, 49 insertions(+), 75 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index de30eed4..af6af70e 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c @@ -57,110 +57,85 @@ struct gk20a_sync_pt { struct gk20a_sync_pt_inst { struct sync_pt pt; - - /* - * Magic number to identify a gk20a_sync_pt_inst from either a struct - * fence or a struct sync_pt. - */ -#define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef; - u32 magic; - struct gk20a_sync_pt *shared; }; -/** - * Check if a sync_pt is a gk20a_sync_pt_inst. - */ -int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt) -{ - struct gk20a_sync_pt_inst *pti = - container_of(pt, struct gk20a_sync_pt_inst, pt); - - return pti->magic == GK20A_SYNC_PT_INST_MAGIC; -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) -/** - * Check if a fence is a gk20a_sync_pt_inst. - */ -int gk20a_is_gk20a_sync_pt_inst(struct fence *f) -{ - struct sync_pt *pt = container_of(f, struct sync_pt, base); - - return __gk20a_is_gk20a_sync_pt_inst(pt); -} - -/** - * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the - * passed fence is in fact a gk20a_sync_pt_inst - use - * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. - */ -struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f) -{ - struct sync_pt *pt = container_of(f, struct sync_pt, base); - struct gk20a_sync_pt_inst *pti = - container_of(pt, struct gk20a_sync_pt_inst, pt); - - BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f)); - - return pti->shared->sema; -} -#else -/** - * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the - * passed sync_pt is in fact a gk20a_sync_pt_inst - use - * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. - */ -struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt) -{ - struct gk20a_sync_pt_inst *pti; - - BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt)); - pti = container_of(pt, struct gk20a_sync_pt_inst, pt); - - return pti->shared->sema; -} -#endif - /** * Check if the passed sync_fence is backed by a single GPU semaphore. In such * cases we can short circuit a lot of SW involved in signaling pre-fences and * post fences. + * + * For now reject multi-sync_pt fences. This could be changed in future. It + * would require that the sema fast path push a sema acquire for each semaphore + * in the fence. */ int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) { + struct sync_timeline *t; + #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) - struct sync_pt *pt; + struct sync_pt *spt; int i = 0; if (list_empty(&fence->pt_list_head)) return 0; - /* - * For now reject multi-sync_pt fences. This could be changed in - * future. It would require that the sema fast path push a sema - * acquire for each semaphore in the fence. - */ - list_for_each_entry(pt, &fence->pt_list_head, pt_list) { + list_for_each_entry(spt, &fence->pt_list_head, pt_list) { i++; if (i >= 2) return 0; } - pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); - return __gk20a_is_gk20a_sync_pt_inst(pt); - + spt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); + t = spt->parent; #else - struct sync_fence_cb *cb0 = &fence->cbs[0]; + struct fence *pt = fence->cbs[0].sync_pt; + struct sync_pt *spt = sync_pt_from_fence(pt); if (fence->num_fences != 1) return 0; - return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt); + if (spt == NULL) + return 0; + + t = sync_pt_parent(spt); #endif + + if (t->ops == &gk20a_sync_timeline_ops) + return 1; + return 0; } +struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f) +{ + struct sync_pt *spt; + struct gk20a_sync_pt_inst *pti; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) + if (!f) + return NULL; + + if (!gk20a_is_sema_backed_sync_fence(f)) + return NULL; + + spt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); +#else + struct fence *pt; + + if (!f) + return NULL; + + if (!gk20a_is_sema_backed_sync_fence(f)) + return NULL; + + pt = f->cbs[0].sync_pt; + spt = sync_pt_from_fence(pt); +#endif + pti = container_of(spt, struct gk20a_sync_pt_inst, pt); + + return pti->shared->sema; +} /** * Compares sync pt values a and b, both of which will trigger either before @@ -283,7 +258,6 @@ static struct sync_pt *gk20a_sync_pt_create_inst( if (!pti) return NULL; - pti->magic = GK20A_SYNC_PT_INST_MAGIC; pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); if (!pti->shared) { sync_pt_free(&pti->pt); -- cgit v1.2.2