diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c new file mode 100644 index 00000000..277b3334 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -0,0 +1,436 @@ | |||
1 | /* | ||
2 | * GK20A Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/file.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/hrtimer.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <nvgpu/lock.h> | ||
30 | #include <uapi/linux/nvgpu.h> | ||
31 | |||
32 | #include <nvgpu/kmem.h> | ||
33 | #include <nvgpu/semaphore.h> | ||
34 | #include <nvgpu/bug.h> | ||
35 | #include <nvgpu/kref.h> | ||
36 | |||
37 | #include "../drivers/staging/android/sync.h" | ||
38 | |||
39 | #include "sync_gk20a.h" | ||
40 | |||
41 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
42 | |||
43 | struct gk20a_sync_timeline { | ||
44 | struct sync_timeline obj; | ||
45 | u32 max; | ||
46 | u32 min; | ||
47 | }; | ||
48 | |||
49 | /** | ||
50 | * The sync framework dups pts when merging fences. We share a single | ||
51 | * refcounted gk20a_sync_pt for each duped pt. | ||
52 | */ | ||
53 | struct gk20a_sync_pt { | ||
54 | struct gk20a *g; | ||
55 | struct nvgpu_ref refcount; | ||
56 | u32 thresh; | ||
57 | struct nvgpu_semaphore *sema; | ||
58 | struct gk20a_sync_timeline *obj; | ||
59 | |||
60 | /* | ||
61 | * Use a spin lock here since it will have better performance | ||
62 | * than a mutex - there should be very little contention on this | ||
63 | * lock. | ||
64 | */ | ||
65 | struct nvgpu_spinlock lock; | ||
66 | }; | ||
67 | |||
68 | struct gk20a_sync_pt_inst { | ||
69 | struct sync_pt pt; | ||
70 | struct gk20a_sync_pt *shared; | ||
71 | }; | ||
72 | |||
73 | /** | ||
74 | * Check if the passed sync_fence is backed by a single GPU semaphore. In such | ||
75 | * cases we can short circuit a lot of SW involved in signaling pre-fences and | ||
76 | * post fences. | ||
77 | * | ||
78 | * For now reject multi-sync_pt fences. This could be changed in future. It | ||
79 | * would require that the sema fast path push a sema acquire for each semaphore | ||
80 | * in the fence. | ||
81 | */ | ||
82 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) | ||
83 | { | ||
84 | struct sync_timeline *t; | ||
85 | |||
86 | struct fence *pt = fence->cbs[0].sync_pt; | ||
87 | struct sync_pt *spt = sync_pt_from_fence(pt); | ||
88 | |||
89 | if (fence->num_fences != 1) | ||
90 | return 0; | ||
91 | |||
92 | if (spt == NULL) | ||
93 | return 0; | ||
94 | |||
95 | t = sync_pt_parent(spt); | ||
96 | |||
97 | if (t->ops == &gk20a_sync_timeline_ops) | ||
98 | return 1; | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f) | ||
103 | { | ||
104 | struct sync_pt *spt; | ||
105 | struct gk20a_sync_pt_inst *pti; | ||
106 | |||
107 | struct fence *pt; | ||
108 | |||
109 | if (!f) | ||
110 | return NULL; | ||
111 | |||
112 | if (!gk20a_is_sema_backed_sync_fence(f)) | ||
113 | return NULL; | ||
114 | |||
115 | pt = f->cbs[0].sync_pt; | ||
116 | spt = sync_pt_from_fence(pt); | ||
117 | pti = container_of(spt, struct gk20a_sync_pt_inst, pt); | ||
118 | |||
119 | return pti->shared->sema; | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * Compares sync pt values a and b, both of which will trigger either before | ||
124 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
125 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
126 | * | ||
127 | * Returns -1 if a < b (a triggers before b) | ||
128 | * 0 if a = b (a and b trigger at the same time) | ||
129 | * 1 if a > b (b triggers before a) | ||
130 | */ | ||
131 | static int __gk20a_sync_pt_compare_ref( | ||
132 | u32 ref, | ||
133 | u32 a, | ||
134 | u32 b) | ||
135 | { | ||
136 | /* | ||
137 | * We normalize both a and b by subtracting ref from them. | ||
138 | * Denote the normalized values by a_n and b_n. Note that because | ||
139 | * of wrapping, a_n and/or b_n may be negative. | ||
140 | * | ||
141 | * The normalized values a_n and b_n satisfy: | ||
142 | * - a positive value triggers before a negative value | ||
143 | * - a smaller positive value triggers before a greater positive value | ||
144 | * - a smaller negative value (greater in absolute value) triggers | ||
145 | * before a greater negative value (smaller in absolute value). | ||
146 | * | ||
147 | * Thus we can just stick to unsigned arithmetic and compare | ||
148 | * (u32)a_n to (u32)b_n. | ||
149 | * | ||
150 | * Just to reiterate the possible cases: | ||
151 | * | ||
152 | * 1A) ...ref..a....b.... | ||
153 | * 1B) ...ref..b....a.... | ||
154 | * 2A) ...b....ref..a.... b_n < 0 | ||
155 | * 2B) ...a....ref..b.... a_n > 0 | ||
156 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
157 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
158 | */ | ||
159 | u32 a_n = a - ref; | ||
160 | u32 b_n = b - ref; | ||
161 | if (a_n < b_n) | ||
162 | return -1; | ||
163 | else if (a_n > b_n) | ||
164 | return 1; | ||
165 | else | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
170 | { | ||
171 | struct gk20a_sync_pt_inst *pti = | ||
172 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
173 | return pti->shared; | ||
174 | } | ||
175 | static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
176 | { | ||
177 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
178 | return NULL; | ||
179 | return (struct gk20a_sync_timeline *)obj; | ||
180 | } | ||
181 | |||
182 | static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) | ||
183 | { | ||
184 | struct gk20a_sync_pt *pt = | ||
185 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
186 | struct gk20a *g = pt->g; | ||
187 | |||
188 | if (pt->sema) | ||
189 | nvgpu_semaphore_put(pt->sema); | ||
190 | nvgpu_kfree(g, pt); | ||
191 | } | ||
192 | |||
193 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
194 | struct gk20a *g, | ||
195 | struct gk20a_sync_timeline *obj, | ||
196 | struct nvgpu_semaphore *sema) | ||
197 | { | ||
198 | struct gk20a_sync_pt *shared; | ||
199 | |||
200 | shared = nvgpu_kzalloc(g, sizeof(*shared)); | ||
201 | if (!shared) | ||
202 | return NULL; | ||
203 | |||
204 | nvgpu_ref_init(&shared->refcount); | ||
205 | shared->g = g; | ||
206 | shared->obj = obj; | ||
207 | shared->sema = sema; | ||
208 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
209 | |||
210 | nvgpu_spinlock_init(&shared->lock); | ||
211 | |||
212 | nvgpu_semaphore_get(sema); | ||
213 | |||
214 | return shared; | ||
215 | } | ||
216 | |||
217 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
218 | struct gk20a *g, | ||
219 | struct gk20a_sync_timeline *obj, | ||
220 | struct nvgpu_semaphore *sema) | ||
221 | { | ||
222 | struct gk20a_sync_pt_inst *pti; | ||
223 | |||
224 | pti = (struct gk20a_sync_pt_inst *) | ||
225 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
226 | if (!pti) | ||
227 | return NULL; | ||
228 | |||
229 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); | ||
230 | if (!pti->shared) { | ||
231 | sync_pt_free(&pti->pt); | ||
232 | return NULL; | ||
233 | } | ||
234 | return &pti->pt; | ||
235 | } | ||
236 | |||
237 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
238 | { | ||
239 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
240 | if (pt) | ||
241 | nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
242 | } | ||
243 | |||
244 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
245 | { | ||
246 | struct gk20a_sync_pt_inst *pti; | ||
247 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
248 | |||
249 | pti = (struct gk20a_sync_pt_inst *) | ||
250 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
251 | if (!pti) | ||
252 | return NULL; | ||
253 | pti->shared = pt; | ||
254 | nvgpu_ref_get(&pt->refcount); | ||
255 | return &pti->pt; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * This function must be able to run on the same sync_pt concurrently. This | ||
260 | * requires a lock to protect access to the sync_pt's internal data structures | ||
261 | * which are modified as a side effect of calling this function. | ||
262 | */ | ||
263 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
264 | { | ||
265 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
266 | struct gk20a_sync_timeline *obj = pt->obj; | ||
267 | bool signaled = true; | ||
268 | |||
269 | nvgpu_spinlock_acquire(&pt->lock); | ||
270 | if (!pt->sema) | ||
271 | goto done; | ||
272 | |||
273 | /* Acquired == not realeased yet == active == not signaled. */ | ||
274 | signaled = !nvgpu_semaphore_is_acquired(pt->sema); | ||
275 | |||
276 | if (signaled) { | ||
277 | /* Update min if necessary. */ | ||
278 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
279 | obj->min) == 1) | ||
280 | obj->min = pt->thresh; | ||
281 | |||
282 | /* Release the semaphore to the pool. */ | ||
283 | nvgpu_semaphore_put(pt->sema); | ||
284 | pt->sema = NULL; | ||
285 | } | ||
286 | done: | ||
287 | nvgpu_spinlock_release(&pt->lock); | ||
288 | |||
289 | return signaled; | ||
290 | } | ||
291 | |||
292 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
293 | { | ||
294 | bool a_expired; | ||
295 | bool b_expired; | ||
296 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
297 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
298 | |||
299 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
300 | return 0; | ||
301 | |||
302 | /* Early out */ | ||
303 | if (a == b) | ||
304 | return 0; | ||
305 | |||
306 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
307 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
308 | if (a_expired && !b_expired) { | ||
309 | /* Easy, a was earlier */ | ||
310 | return -1; | ||
311 | } else if (!a_expired && b_expired) { | ||
312 | /* Easy, b was earlier */ | ||
313 | return 1; | ||
314 | } | ||
315 | |||
316 | /* Both a and b are expired (trigger before min) or not | ||
317 | * expired (trigger after min), so we can use min | ||
318 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
319 | */ | ||
320 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
321 | pt_a->thresh, pt_b->thresh); | ||
322 | } | ||
323 | |||
324 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
325 | { | ||
326 | return obj->min; | ||
327 | } | ||
328 | |||
329 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
330 | char *str, int size) | ||
331 | { | ||
332 | struct gk20a_sync_timeline *obj = | ||
333 | (struct gk20a_sync_timeline *)timeline; | ||
334 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
335 | } | ||
336 | |||
337 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
338 | char *str, int size) | ||
339 | { | ||
340 | struct nvgpu_semaphore *s = pt->sema; | ||
341 | |||
342 | snprintf(str, size, "S: c=%d [v=%u,r_v=%u]", | ||
343 | s->hw_sema->ch->chid, | ||
344 | nvgpu_semaphore_get_value(s), | ||
345 | nvgpu_semaphore_read(s)); | ||
346 | } | ||
347 | |||
348 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
349 | int size) | ||
350 | { | ||
351 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
352 | |||
353 | if (pt->sema) { | ||
354 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
355 | return; | ||
356 | } | ||
357 | |||
358 | snprintf(str, size, "%d", pt->thresh); | ||
359 | } | ||
360 | |||
361 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
362 | .driver_name = "nvgpu_semaphore", | ||
363 | .dup = gk20a_sync_pt_dup_inst, | ||
364 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
365 | .compare = gk20a_sync_pt_compare, | ||
366 | .free_pt = gk20a_sync_pt_free_inst, | ||
367 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
368 | .pt_value_str = gk20a_sync_pt_value_str, | ||
369 | }; | ||
370 | |||
371 | /* Public API */ | ||
372 | |||
373 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
374 | { | ||
375 | return sync_fence_fdget(fd); | ||
376 | } | ||
377 | |||
378 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
379 | { | ||
380 | sync_timeline_signal(timeline, 0); | ||
381 | } | ||
382 | |||
383 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
384 | { | ||
385 | sync_timeline_destroy(timeline); | ||
386 | } | ||
387 | |||
388 | struct sync_timeline *gk20a_sync_timeline_create( | ||
389 | const char *fmt, ...) | ||
390 | { | ||
391 | struct gk20a_sync_timeline *obj; | ||
392 | char name[30]; | ||
393 | va_list args; | ||
394 | |||
395 | va_start(args, fmt); | ||
396 | vsnprintf(name, sizeof(name), fmt, args); | ||
397 | va_end(args); | ||
398 | |||
399 | obj = (struct gk20a_sync_timeline *) | ||
400 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
401 | sizeof(struct gk20a_sync_timeline), | ||
402 | name); | ||
403 | if (!obj) | ||
404 | return NULL; | ||
405 | obj->max = 0; | ||
406 | obj->min = 0; | ||
407 | return &obj->obj; | ||
408 | } | ||
409 | |||
410 | struct sync_fence *gk20a_sync_fence_create( | ||
411 | struct gk20a *g, | ||
412 | struct sync_timeline *obj, | ||
413 | struct nvgpu_semaphore *sema, | ||
414 | const char *fmt, ...) | ||
415 | { | ||
416 | char name[30]; | ||
417 | va_list args; | ||
418 | struct sync_pt *pt; | ||
419 | struct sync_fence *fence; | ||
420 | struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); | ||
421 | |||
422 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); | ||
423 | if (pt == NULL) | ||
424 | return NULL; | ||
425 | |||
426 | va_start(args, fmt); | ||
427 | vsnprintf(name, sizeof(name), fmt, args); | ||
428 | va_end(args); | ||
429 | |||
430 | fence = sync_fence_create(name, pt); | ||
431 | if (fence == NULL) { | ||
432 | sync_pt_free(pt); | ||
433 | return NULL; | ||
434 | } | ||
435 | return fence; | ||
436 | } | ||