summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-03-13 10:58:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-16 10:34:01 -0400
commit34323b559590ed8f1c64ecbb7ffbd838a6478594 (patch)
treec6258e44413a2f15ded4cf63e4a16f5118921703 /drivers/gpu/nvgpu/gk20a/sync_gk20a.h
parentfb40f2a80739985abac273bc493e07341aa003af (diff)
gpu: nvgpu: wait for all prefence semas on gpu
The pre-fence wait for semaphores in the submit path has supported a fast path for fences that have only one underlying semaphore. The fast path just inserts the wait on this sema to the pushbuffer directly. For other fences, the path has been using a CPU wait indirection, signaling another semaphore when we get the CPU-side callback. Instead of only supporting prefences with one sema, unroll all the individual semaphores and insert waits for each to a pushbuffer, like we've already been doing with syncpoints. Now all sema-backed syncs get the fast path. This simplifies the logic and makes it more explicit that only foreign fences need the CPU wait. There is no need to hold references to the sync fence or the semas inside: this submitted job only needs the global read-only sema mapping that is guaranteed to stay alive while the VM of this channel stays alive, and the job does not outlive this channel. Jira NVGPU-43 Jira NVGPU-66 Jira NVGPU-513 Change-Id: I7cfbb510001d998a864aed8d6afd1582b9adb80d Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1636345 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.h6
1 files changed, 2 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 7d7aff6d..8a6439ab 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Sync Framework Integration 4 * GK20A Sync Framework Integration
5 * 5 *
6 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a 8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"), 9 * copy of this software and associated documentation files (the "Software"),
@@ -33,9 +33,6 @@ struct sync_pt;
33struct nvgpu_semaphore; 33struct nvgpu_semaphore;
34struct fence; 34struct fence;
35 35
36int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence);
37struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f);
38
39#ifdef CONFIG_SYNC 36#ifdef CONFIG_SYNC
40struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); 37struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
41void gk20a_sync_timeline_destroy(struct sync_timeline *); 38void gk20a_sync_timeline_destroy(struct sync_timeline *);
@@ -46,6 +43,7 @@ struct sync_fence *gk20a_sync_fence_create(
46 struct nvgpu_semaphore *, 43 struct nvgpu_semaphore *,
47 const char *fmt, ...); 44 const char *fmt, ...);
48struct sync_fence *gk20a_sync_fence_fdget(int fd); 45struct sync_fence *gk20a_sync_fence_fdget(int fd);
46struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
49#else 47#else
50static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} 48static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
51static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} 49static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}