diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-07-28 14:22:53 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-16 00:58:37 -0400 |
commit | 7b8cbd2be37643e755ccb204e38c20b17aac28bd (patch) | |
tree | 9605958d7a7de7762276c8f1c3be5e44dc2567b4 /drivers/gpu/nvgpu | |
parent | 9bd76b7fa08672181c2a0cce747a59664e226964 (diff) |
gpu: nvgpu: Greatly simplify the semaphore detection
Greatly simplify and make more robust the gpu semaphore detection
in sync_fences. Instead of using a magic number use the parent
timeline of sync_pts.
This will also work with multi-GPU setups using nvgpu since the
timeline ops pointer will be the same across all instances of
nvgpu.
Bug 1732449
Reviewed-on: http://git-master/r/1203834
(cherry picked from commit 66eeb577eae5d10741fd15f3659e843c70792cd6)
Change-Id: I4c6619d70b5531e2676e18d1330724e8f8b9bcb3
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1221042
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 124 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.h | 9 |
3 files changed, 51 insertions, 101 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 98363c88..0eff4e2f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -510,24 +510,7 @@ static int gk20a_channel_semaphore_wait_syncpt( | |||
510 | return -ENODEV; | 510 | return -ENODEV; |
511 | } | 511 | } |
512 | 512 | ||
513 | /* | ||
514 | * UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18. | ||
515 | * But since there's no API for getting the underlying sync_pts we have to do | ||
516 | * some conditional compilation. | ||
517 | */ | ||
518 | #ifdef CONFIG_SYNC | 513 | #ifdef CONFIG_SYNC |
519 | static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f) | ||
520 | { | ||
521 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
522 | struct sync_pt *pt; | ||
523 | |||
524 | pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); | ||
525 | return gk20a_sync_pt_inst_get_sema(pt); | ||
526 | #else | ||
527 | return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt); | ||
528 | #endif | ||
529 | } | ||
530 | |||
531 | /* | 514 | /* |
532 | * Attempt a fast path for waiting on a sync_fence. Basically if the passed | 515 | * Attempt a fast path for waiting on a sync_fence. Basically if the passed |
533 | * sync_fence is backed by a gk20a_semaphore then there's no reason to go | 516 | * sync_fence is backed by a gk20a_semaphore then there's no reason to go |
@@ -551,7 +534,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | |||
551 | if (!gk20a_is_sema_backed_sync_fence(fence)) | 534 | if (!gk20a_is_sema_backed_sync_fence(fence)) |
552 | return -ENODEV; | 535 | return -ENODEV; |
553 | 536 | ||
554 | sema = sema_from_sync_fence(fence); | 537 | sema = gk20a_sync_fence_get_sema(fence); |
555 | 538 | ||
556 | /* | 539 | /* |
557 | * If there's no underlying sema then that means the underlying sema has | 540 | * If there's no underlying sema then that means the underlying sema has |
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index de30eed4..af6af70e 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -57,110 +57,85 @@ struct gk20a_sync_pt { | |||
57 | 57 | ||
58 | struct gk20a_sync_pt_inst { | 58 | struct gk20a_sync_pt_inst { |
59 | struct sync_pt pt; | 59 | struct sync_pt pt; |
60 | |||
61 | /* | ||
62 | * Magic number to identify a gk20a_sync_pt_inst from either a struct | ||
63 | * fence or a struct sync_pt. | ||
64 | */ | ||
65 | #define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef; | ||
66 | u32 magic; | ||
67 | |||
68 | struct gk20a_sync_pt *shared; | 60 | struct gk20a_sync_pt *shared; |
69 | }; | 61 | }; |
70 | 62 | ||
71 | /** | 63 | /** |
72 | * Check if a sync_pt is a gk20a_sync_pt_inst. | ||
73 | */ | ||
74 | int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt) | ||
75 | { | ||
76 | struct gk20a_sync_pt_inst *pti = | ||
77 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
78 | |||
79 | return pti->magic == GK20A_SYNC_PT_INST_MAGIC; | ||
80 | } | ||
81 | |||
82 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) | ||
83 | /** | ||
84 | * Check if a fence is a gk20a_sync_pt_inst. | ||
85 | */ | ||
86 | int gk20a_is_gk20a_sync_pt_inst(struct fence *f) | ||
87 | { | ||
88 | struct sync_pt *pt = container_of(f, struct sync_pt, base); | ||
89 | |||
90 | return __gk20a_is_gk20a_sync_pt_inst(pt); | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the | ||
95 | * passed fence is in fact a gk20a_sync_pt_inst - use | ||
96 | * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. | ||
97 | */ | ||
98 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f) | ||
99 | { | ||
100 | struct sync_pt *pt = container_of(f, struct sync_pt, base); | ||
101 | struct gk20a_sync_pt_inst *pti = | ||
102 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
103 | |||
104 | BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f)); | ||
105 | |||
106 | return pti->shared->sema; | ||
107 | } | ||
108 | #else | ||
109 | /** | ||
110 | * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the | ||
111 | * passed sync_pt is in fact a gk20a_sync_pt_inst - use | ||
112 | * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. | ||
113 | */ | ||
114 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt) | ||
115 | { | ||
116 | struct gk20a_sync_pt_inst *pti; | ||
117 | |||
118 | BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt)); | ||
119 | pti = container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
120 | |||
121 | return pti->shared->sema; | ||
122 | } | ||
123 | #endif | ||
124 | |||
125 | /** | ||
126 | * Check if the passed sync_fence is backed by a single GPU semaphore. In such | 64 | * Check if the passed sync_fence is backed by a single GPU semaphore. In such |
127 | * cases we can short circuit a lot of SW involved in signaling pre-fences and | 65 | * cases we can short circuit a lot of SW involved in signaling pre-fences and |
128 | * post fences. | 66 | * post fences. |
67 | * | ||
68 | * For now reject multi-sync_pt fences. This could be changed in future. It | ||
69 | * would require that the sema fast path push a sema acquire for each semaphore | ||
70 | * in the fence. | ||
129 | */ | 71 | */ |
130 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) | 72 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) |
131 | { | 73 | { |
74 | struct sync_timeline *t; | ||
75 | |||
132 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | 76 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) |
133 | struct sync_pt *pt; | 77 | struct sync_pt *spt; |
134 | int i = 0; | 78 | int i = 0; |
135 | 79 | ||
136 | if (list_empty(&fence->pt_list_head)) | 80 | if (list_empty(&fence->pt_list_head)) |
137 | return 0; | 81 | return 0; |
138 | 82 | ||
139 | /* | 83 | list_for_each_entry(spt, &fence->pt_list_head, pt_list) { |
140 | * For now reject multi-sync_pt fences. This could be changed in | ||
141 | * future. It would require that the sema fast path push a sema | ||
142 | * acquire for each semaphore in the fence. | ||
143 | */ | ||
144 | list_for_each_entry(pt, &fence->pt_list_head, pt_list) { | ||
145 | i++; | 84 | i++; |
146 | 85 | ||
147 | if (i >= 2) | 86 | if (i >= 2) |
148 | return 0; | 87 | return 0; |
149 | } | 88 | } |
150 | 89 | ||
151 | pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); | 90 | spt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); |
152 | return __gk20a_is_gk20a_sync_pt_inst(pt); | 91 | t = spt->parent; |
153 | |||
154 | #else | 92 | #else |
155 | struct sync_fence_cb *cb0 = &fence->cbs[0]; | 93 | struct fence *pt = fence->cbs[0].sync_pt; |
94 | struct sync_pt *spt = sync_pt_from_fence(pt); | ||
156 | 95 | ||
157 | if (fence->num_fences != 1) | 96 | if (fence->num_fences != 1) |
158 | return 0; | 97 | return 0; |
159 | 98 | ||
160 | return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt); | 99 | if (spt == NULL) |
100 | return 0; | ||
101 | |||
102 | t = sync_pt_parent(spt); | ||
161 | #endif | 103 | #endif |
104 | |||
105 | if (t->ops == &gk20a_sync_timeline_ops) | ||
106 | return 1; | ||
107 | return 0; | ||
162 | } | 108 | } |
163 | 109 | ||
110 | struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f) | ||
111 | { | ||
112 | struct sync_pt *spt; | ||
113 | struct gk20a_sync_pt_inst *pti; | ||
114 | |||
115 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
116 | if (!f) | ||
117 | return NULL; | ||
118 | |||
119 | if (!gk20a_is_sema_backed_sync_fence(f)) | ||
120 | return NULL; | ||
121 | |||
122 | spt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list); | ||
123 | #else | ||
124 | struct fence *pt; | ||
125 | |||
126 | if (!f) | ||
127 | return NULL; | ||
128 | |||
129 | if (!gk20a_is_sema_backed_sync_fence(f)) | ||
130 | return NULL; | ||
131 | |||
132 | pt = f->cbs[0].sync_pt; | ||
133 | spt = sync_pt_from_fence(pt); | ||
134 | #endif | ||
135 | pti = container_of(spt, struct gk20a_sync_pt_inst, pt); | ||
136 | |||
137 | return pti->shared->sema; | ||
138 | } | ||
164 | 139 | ||
165 | /** | 140 | /** |
166 | * Compares sync pt values a and b, both of which will trigger either before | 141 | * Compares sync pt values a and b, both of which will trigger either before |
@@ -283,7 +258,6 @@ static struct sync_pt *gk20a_sync_pt_create_inst( | |||
283 | if (!pti) | 258 | if (!pti) |
284 | return NULL; | 259 | return NULL; |
285 | 260 | ||
286 | pti->magic = GK20A_SYNC_PT_INST_MAGIC; | ||
287 | pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); | 261 | pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); |
288 | if (!pti->shared) { | 262 | if (!pti->shared) { |
289 | sync_pt_free(&pti->pt); | 263 | sync_pt_free(&pti->pt); |
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h index f885febd..e5b31471 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h | |||
@@ -27,15 +27,8 @@ struct sync_pt; | |||
27 | struct gk20a_semaphore; | 27 | struct gk20a_semaphore; |
28 | struct fence; | 28 | struct fence; |
29 | 29 | ||
30 | int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt); | ||
31 | int gk20a_is_gk20a_sync_pt_inst(struct fence *f); | ||
32 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence); | 30 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence); |
33 | 31 | struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f); | |
34 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) | ||
35 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f); | ||
36 | #else | ||
37 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt); | ||
38 | #endif | ||
39 | 32 | ||
40 | #ifdef CONFIG_SYNC | 33 | #ifdef CONFIG_SYNC |
41 | struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); | 34 | struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); |