diff options
Diffstat (limited to 'include/os/linux/sync_sema_android.c')
-rw-r--r-- | include/os/linux/sync_sema_android.c | 418 |
1 files changed, 0 insertions, 418 deletions
diff --git a/include/os/linux/sync_sema_android.c b/include/os/linux/sync_sema_android.c deleted file mode 100644 index 59e3b7a..0000000 --- a/include/os/linux/sync_sema_android.c +++ /dev/null | |||
@@ -1,418 +0,0 @@ | |||
1 | /* | ||
2 | * Semaphore Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/hrtimer.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | |||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/semaphore.h> | ||
26 | #include <nvgpu/bug.h> | ||
27 | #include <nvgpu/kref.h> | ||
28 | #include <nvgpu/channel.h> | ||
29 | #include "../linux/channel.h" | ||
30 | |||
31 | #include "../drivers/staging/android/sync.h" | ||
32 | |||
33 | #include "sync_sema_android.h" | ||
34 | |||
35 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
36 | |||
37 | struct gk20a_sync_timeline { | ||
38 | struct sync_timeline obj; | ||
39 | u32 max; | ||
40 | u32 min; | ||
41 | }; | ||
42 | |||
43 | /** | ||
44 | * The sync framework dups pts when merging fences. We share a single | ||
45 | * refcounted gk20a_sync_pt for each duped pt. | ||
46 | */ | ||
47 | struct gk20a_sync_pt { | ||
48 | struct gk20a *g; | ||
49 | struct nvgpu_ref refcount; | ||
50 | u32 thresh; | ||
51 | struct nvgpu_semaphore *sema; | ||
52 | struct gk20a_sync_timeline *obj; | ||
53 | |||
54 | /* | ||
55 | * Use a spin lock here since it will have better performance | ||
56 | * than a mutex - there should be very little contention on this | ||
57 | * lock. | ||
58 | */ | ||
59 | struct nvgpu_spinlock lock; | ||
60 | }; | ||
61 | |||
62 | struct gk20a_sync_pt_inst { | ||
63 | struct sync_pt pt; | ||
64 | struct gk20a_sync_pt *shared; | ||
65 | }; | ||
66 | |||
67 | /** | ||
68 | * Compares sync pt values a and b, both of which will trigger either before | ||
69 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
70 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
71 | * | ||
72 | * Returns -1 if a < b (a triggers before b) | ||
73 | * 0 if a = b (a and b trigger at the same time) | ||
74 | * 1 if a > b (b triggers before a) | ||
75 | */ | ||
76 | static int __gk20a_sync_pt_compare_ref( | ||
77 | u32 ref, | ||
78 | u32 a, | ||
79 | u32 b) | ||
80 | { | ||
81 | /* | ||
82 | * We normalize both a and b by subtracting ref from them. | ||
83 | * Denote the normalized values by a_n and b_n. Note that because | ||
84 | * of wrapping, a_n and/or b_n may be negative. | ||
85 | * | ||
86 | * The normalized values a_n and b_n satisfy: | ||
87 | * - a positive value triggers before a negative value | ||
88 | * - a smaller positive value triggers before a greater positive value | ||
89 | * - a smaller negative value (greater in absolute value) triggers | ||
90 | * before a greater negative value (smaller in absolute value). | ||
91 | * | ||
92 | * Thus we can just stick to unsigned arithmetic and compare | ||
93 | * (u32)a_n to (u32)b_n. | ||
94 | * | ||
95 | * Just to reiterate the possible cases: | ||
96 | * | ||
97 | * 1A) ...ref..a....b.... | ||
98 | * 1B) ...ref..b....a.... | ||
99 | * 2A) ...b....ref..a.... b_n < 0 | ||
100 | * 2B) ...a....ref..b.... a_n > 0 | ||
101 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
102 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
103 | */ | ||
104 | u32 a_n = a - ref; | ||
105 | u32 b_n = b - ref; | ||
106 | if (a_n < b_n) | ||
107 | return -1; | ||
108 | else if (a_n > b_n) | ||
109 | return 1; | ||
110 | else | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
115 | { | ||
116 | struct gk20a_sync_pt_inst *pti = | ||
117 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
118 | return pti->shared; | ||
119 | } | ||
120 | static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
121 | { | ||
122 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
123 | return NULL; | ||
124 | return (struct gk20a_sync_timeline *)obj; | ||
125 | } | ||
126 | |||
127 | static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) | ||
128 | { | ||
129 | struct gk20a_sync_pt *pt = | ||
130 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
131 | struct gk20a *g = pt->g; | ||
132 | |||
133 | if (pt->sema) | ||
134 | nvgpu_semaphore_put(pt->sema); | ||
135 | nvgpu_kfree(g, pt); | ||
136 | } | ||
137 | |||
138 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
139 | struct gk20a *g, | ||
140 | struct gk20a_sync_timeline *obj, | ||
141 | struct nvgpu_semaphore *sema) | ||
142 | { | ||
143 | struct gk20a_sync_pt *shared; | ||
144 | |||
145 | shared = nvgpu_kzalloc(g, sizeof(*shared)); | ||
146 | if (!shared) | ||
147 | return NULL; | ||
148 | |||
149 | nvgpu_ref_init(&shared->refcount); | ||
150 | shared->g = g; | ||
151 | shared->obj = obj; | ||
152 | shared->sema = sema; | ||
153 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
154 | |||
155 | nvgpu_spinlock_init(&shared->lock); | ||
156 | |||
157 | nvgpu_semaphore_get(sema); | ||
158 | |||
159 | return shared; | ||
160 | } | ||
161 | |||
162 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
163 | struct gk20a *g, | ||
164 | struct gk20a_sync_timeline *obj, | ||
165 | struct nvgpu_semaphore *sema) | ||
166 | { | ||
167 | struct gk20a_sync_pt_inst *pti; | ||
168 | |||
169 | pti = (struct gk20a_sync_pt_inst *) | ||
170 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
171 | if (!pti) | ||
172 | return NULL; | ||
173 | |||
174 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); | ||
175 | if (!pti->shared) { | ||
176 | sync_pt_free(&pti->pt); | ||
177 | return NULL; | ||
178 | } | ||
179 | return &pti->pt; | ||
180 | } | ||
181 | |||
182 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
183 | { | ||
184 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
185 | if (pt) | ||
186 | nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
187 | } | ||
188 | |||
189 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
190 | { | ||
191 | struct gk20a_sync_pt_inst *pti; | ||
192 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
193 | |||
194 | pti = (struct gk20a_sync_pt_inst *) | ||
195 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
196 | if (!pti) | ||
197 | return NULL; | ||
198 | pti->shared = pt; | ||
199 | nvgpu_ref_get(&pt->refcount); | ||
200 | return &pti->pt; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * This function must be able to run on the same sync_pt concurrently. This | ||
205 | * requires a lock to protect access to the sync_pt's internal data structures | ||
206 | * which are modified as a side effect of calling this function. | ||
207 | */ | ||
208 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
209 | { | ||
210 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
211 | struct gk20a_sync_timeline *obj = pt->obj; | ||
212 | bool signaled = true; | ||
213 | |||
214 | nvgpu_spinlock_acquire(&pt->lock); | ||
215 | if (!pt->sema) | ||
216 | goto done; | ||
217 | |||
218 | /* Acquired == not realeased yet == active == not signaled. */ | ||
219 | signaled = !nvgpu_semaphore_is_acquired(pt->sema); | ||
220 | |||
221 | if (signaled) { | ||
222 | /* Update min if necessary. */ | ||
223 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
224 | obj->min) == 1) | ||
225 | obj->min = pt->thresh; | ||
226 | |||
227 | /* Release the semaphore to the pool. */ | ||
228 | nvgpu_semaphore_put(pt->sema); | ||
229 | pt->sema = NULL; | ||
230 | } | ||
231 | done: | ||
232 | nvgpu_spinlock_release(&pt->lock); | ||
233 | |||
234 | return signaled; | ||
235 | } | ||
236 | |||
237 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
238 | { | ||
239 | bool a_expired; | ||
240 | bool b_expired; | ||
241 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
242 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
243 | |||
244 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
245 | return 0; | ||
246 | |||
247 | /* Early out */ | ||
248 | if (a == b) | ||
249 | return 0; | ||
250 | |||
251 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
252 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
253 | if (a_expired && !b_expired) { | ||
254 | /* Easy, a was earlier */ | ||
255 | return -1; | ||
256 | } else if (!a_expired && b_expired) { | ||
257 | /* Easy, b was earlier */ | ||
258 | return 1; | ||
259 | } | ||
260 | |||
261 | /* Both a and b are expired (trigger before min) or not | ||
262 | * expired (trigger after min), so we can use min | ||
263 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
264 | */ | ||
265 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
266 | pt_a->thresh, pt_b->thresh); | ||
267 | } | ||
268 | |||
269 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
270 | { | ||
271 | return obj->min; | ||
272 | } | ||
273 | |||
274 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
275 | char *str, int size) | ||
276 | { | ||
277 | struct gk20a_sync_timeline *obj = | ||
278 | (struct gk20a_sync_timeline *)timeline; | ||
279 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
280 | } | ||
281 | |||
282 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
283 | char *str, int size) | ||
284 | { | ||
285 | struct nvgpu_semaphore *s = pt->sema; | ||
286 | |||
287 | snprintf(str, size, "S: pool=%llu [v=%u,r_v=%u]", | ||
288 | s->location.pool->page_idx, | ||
289 | nvgpu_semaphore_get_value(s), | ||
290 | nvgpu_semaphore_read(s)); | ||
291 | } | ||
292 | |||
293 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
294 | int size) | ||
295 | { | ||
296 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
297 | |||
298 | if (pt->sema) { | ||
299 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | snprintf(str, size, "%d", pt->thresh); | ||
304 | } | ||
305 | |||
306 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
307 | .driver_name = "nvgpu_semaphore", | ||
308 | .dup = gk20a_sync_pt_dup_inst, | ||
309 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
310 | .compare = gk20a_sync_pt_compare, | ||
311 | .free_pt = gk20a_sync_pt_free_inst, | ||
312 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
313 | .pt_value_str = gk20a_sync_pt_value_str, | ||
314 | }; | ||
315 | |||
316 | /* Public API */ | ||
317 | |||
318 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
319 | { | ||
320 | struct sync_fence *fence = sync_fence_fdget(fd); | ||
321 | int i; | ||
322 | |||
323 | if (!fence) | ||
324 | return NULL; | ||
325 | |||
326 | for (i = 0; i < fence->num_fences; i++) { | ||
327 | struct sync_pt *spt = sync_pt_from_fence(fence->cbs[i].sync_pt); | ||
328 | struct sync_timeline *t; | ||
329 | |||
330 | if (spt == NULL) { | ||
331 | sync_fence_put(fence); | ||
332 | return NULL; | ||
333 | } | ||
334 | |||
335 | t = sync_pt_parent(spt); | ||
336 | if (t->ops != &gk20a_sync_timeline_ops) { | ||
337 | sync_fence_put(fence); | ||
338 | return NULL; | ||
339 | } | ||
340 | } | ||
341 | |||
342 | return fence; | ||
343 | } | ||
344 | |||
345 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) | ||
346 | { | ||
347 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); | ||
348 | struct nvgpu_semaphore *sema; | ||
349 | |||
350 | nvgpu_spinlock_acquire(&pt->lock); | ||
351 | sema = pt->sema; | ||
352 | if (sema) | ||
353 | nvgpu_semaphore_get(sema); | ||
354 | nvgpu_spinlock_release(&pt->lock); | ||
355 | |||
356 | return sema; | ||
357 | } | ||
358 | |||
359 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
360 | { | ||
361 | sync_timeline_signal(timeline, 0); | ||
362 | } | ||
363 | |||
364 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
365 | { | ||
366 | sync_timeline_destroy(timeline); | ||
367 | } | ||
368 | |||
369 | struct sync_timeline *gk20a_sync_timeline_create( | ||
370 | const char *name) | ||
371 | { | ||
372 | struct gk20a_sync_timeline *obj; | ||
373 | |||
374 | obj = (struct gk20a_sync_timeline *) | ||
375 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
376 | sizeof(struct gk20a_sync_timeline), | ||
377 | name); | ||
378 | if (!obj) | ||
379 | return NULL; | ||
380 | obj->max = 0; | ||
381 | obj->min = 0; | ||
382 | return &obj->obj; | ||
383 | } | ||
384 | |||
385 | struct sync_fence *gk20a_sync_fence_create( | ||
386 | struct channel_gk20a *c, | ||
387 | struct nvgpu_semaphore *sema, | ||
388 | const char *fmt, ...) | ||
389 | { | ||
390 | char name[30]; | ||
391 | va_list args; | ||
392 | struct sync_pt *pt; | ||
393 | struct sync_fence *fence; | ||
394 | struct gk20a *g = c->g; | ||
395 | |||
396 | struct nvgpu_channel_linux *os_channel_priv = c->os_priv; | ||
397 | struct nvgpu_os_fence_framework *fence_framework = NULL; | ||
398 | struct gk20a_sync_timeline *timeline = NULL; | ||
399 | |||
400 | fence_framework = &os_channel_priv->fence_framework; | ||
401 | |||
402 | timeline = to_gk20a_timeline(fence_framework->timeline); | ||
403 | |||
404 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); | ||
405 | if (pt == NULL) | ||
406 | return NULL; | ||
407 | |||
408 | va_start(args, fmt); | ||
409 | vsnprintf(name, sizeof(name), fmt, args); | ||
410 | va_end(args); | ||
411 | |||
412 | fence = sync_fence_create(name, pt); | ||
413 | if (fence == NULL) { | ||
414 | sync_pt_free(pt); | ||
415 | return NULL; | ||
416 | } | ||
417 | return fence; | ||
418 | } | ||