summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c436
1 files changed, 436 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
new file mode 100644
index 00000000..277b3334
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -0,0 +1,436 @@
1/*
2 * GK20A Sync Framework Integration
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <linux/file.h>
26#include <linux/fs.h>
27#include <linux/hrtimer.h>
28#include <linux/module.h>
29#include <nvgpu/lock.h>
30#include <uapi/linux/nvgpu.h>
31
32#include <nvgpu/kmem.h>
33#include <nvgpu/semaphore.h>
34#include <nvgpu/bug.h>
35#include <nvgpu/kref.h>
36
37#include "../drivers/staging/android/sync.h"
38
39#include "sync_gk20a.h"
40
41static const struct sync_timeline_ops gk20a_sync_timeline_ops;
42
43struct gk20a_sync_timeline {
44 struct sync_timeline obj;
45 u32 max;
46 u32 min;
47};
48
49/**
50 * The sync framework dups pts when merging fences. We share a single
51 * refcounted gk20a_sync_pt for each duped pt.
52 */
53struct gk20a_sync_pt {
54 struct gk20a *g;
55 struct nvgpu_ref refcount;
56 u32 thresh;
57 struct nvgpu_semaphore *sema;
58 struct gk20a_sync_timeline *obj;
59
60 /*
61 * Use a spin lock here since it will have better performance
62 * than a mutex - there should be very little contention on this
63 * lock.
64 */
65 struct nvgpu_spinlock lock;
66};
67
68struct gk20a_sync_pt_inst {
69 struct sync_pt pt;
70 struct gk20a_sync_pt *shared;
71};
72
73/**
74 * Check if the passed sync_fence is backed by a single GPU semaphore. In such
75 * cases we can short circuit a lot of SW involved in signaling pre-fences and
76 * post fences.
77 *
78 * For now reject multi-sync_pt fences. This could be changed in future. It
79 * would require that the sema fast path push a sema acquire for each semaphore
80 * in the fence.
81 */
82int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
83{
84 struct sync_timeline *t;
85
86 struct fence *pt = fence->cbs[0].sync_pt;
87 struct sync_pt *spt = sync_pt_from_fence(pt);
88
89 if (fence->num_fences != 1)
90 return 0;
91
92 if (spt == NULL)
93 return 0;
94
95 t = sync_pt_parent(spt);
96
97 if (t->ops == &gk20a_sync_timeline_ops)
98 return 1;
99 return 0;
100}
101
102struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f)
103{
104 struct sync_pt *spt;
105 struct gk20a_sync_pt_inst *pti;
106
107 struct fence *pt;
108
109 if (!f)
110 return NULL;
111
112 if (!gk20a_is_sema_backed_sync_fence(f))
113 return NULL;
114
115 pt = f->cbs[0].sync_pt;
116 spt = sync_pt_from_fence(pt);
117 pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
118
119 return pti->shared->sema;
120}
121
122/**
123 * Compares sync pt values a and b, both of which will trigger either before
124 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
125 * ref). Supplying ref allows us to handle wrapping correctly.
126 *
127 * Returns -1 if a < b (a triggers before b)
128 * 0 if a = b (a and b trigger at the same time)
129 * 1 if a > b (b triggers before a)
130 */
131static int __gk20a_sync_pt_compare_ref(
132 u32 ref,
133 u32 a,
134 u32 b)
135{
136 /*
137 * We normalize both a and b by subtracting ref from them.
138 * Denote the normalized values by a_n and b_n. Note that because
139 * of wrapping, a_n and/or b_n may be negative.
140 *
141 * The normalized values a_n and b_n satisfy:
142 * - a positive value triggers before a negative value
143 * - a smaller positive value triggers before a greater positive value
144 * - a smaller negative value (greater in absolute value) triggers
145 * before a greater negative value (smaller in absolute value).
146 *
147 * Thus we can just stick to unsigned arithmetic and compare
148 * (u32)a_n to (u32)b_n.
149 *
150 * Just to reiterate the possible cases:
151 *
152 * 1A) ...ref..a....b....
153 * 1B) ...ref..b....a....
154 * 2A) ...b....ref..a.... b_n < 0
155 * 2B) ...a....ref..b.... a_n > 0
156 * 3A) ...a....b....ref.. a_n < 0, b_n < 0
157 * 3A) ...b....a....ref.. a_n < 0, b_n < 0
158 */
159 u32 a_n = a - ref;
160 u32 b_n = b - ref;
161 if (a_n < b_n)
162 return -1;
163 else if (a_n > b_n)
164 return 1;
165 else
166 return 0;
167}
168
169static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
170{
171 struct gk20a_sync_pt_inst *pti =
172 container_of(pt, struct gk20a_sync_pt_inst, pt);
173 return pti->shared;
174}
175static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
176{
177 if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
178 return NULL;
179 return (struct gk20a_sync_timeline *)obj;
180}
181
182static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
183{
184 struct gk20a_sync_pt *pt =
185 container_of(ref, struct gk20a_sync_pt, refcount);
186 struct gk20a *g = pt->g;
187
188 if (pt->sema)
189 nvgpu_semaphore_put(pt->sema);
190 nvgpu_kfree(g, pt);
191}
192
193static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
194 struct gk20a *g,
195 struct gk20a_sync_timeline *obj,
196 struct nvgpu_semaphore *sema)
197{
198 struct gk20a_sync_pt *shared;
199
200 shared = nvgpu_kzalloc(g, sizeof(*shared));
201 if (!shared)
202 return NULL;
203
204 nvgpu_ref_init(&shared->refcount);
205 shared->g = g;
206 shared->obj = obj;
207 shared->sema = sema;
208 shared->thresh = ++obj->max; /* sync framework has a lock */
209
210 nvgpu_spinlock_init(&shared->lock);
211
212 nvgpu_semaphore_get(sema);
213
214 return shared;
215}
216
217static struct sync_pt *gk20a_sync_pt_create_inst(
218 struct gk20a *g,
219 struct gk20a_sync_timeline *obj,
220 struct nvgpu_semaphore *sema)
221{
222 struct gk20a_sync_pt_inst *pti;
223
224 pti = (struct gk20a_sync_pt_inst *)
225 sync_pt_create(&obj->obj, sizeof(*pti));
226 if (!pti)
227 return NULL;
228
229 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
230 if (!pti->shared) {
231 sync_pt_free(&pti->pt);
232 return NULL;
233 }
234 return &pti->pt;
235}
236
237static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
238{
239 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
240 if (pt)
241 nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
242}
243
244static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
245{
246 struct gk20a_sync_pt_inst *pti;
247 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
248
249 pti = (struct gk20a_sync_pt_inst *)
250 sync_pt_create(&pt->obj->obj, sizeof(*pti));
251 if (!pti)
252 return NULL;
253 pti->shared = pt;
254 nvgpu_ref_get(&pt->refcount);
255 return &pti->pt;
256}
257
258/*
259 * This function must be able to run on the same sync_pt concurrently. This
260 * requires a lock to protect access to the sync_pt's internal data structures
261 * which are modified as a side effect of calling this function.
262 */
263static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
264{
265 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
266 struct gk20a_sync_timeline *obj = pt->obj;
267 bool signaled = true;
268
269 nvgpu_spinlock_acquire(&pt->lock);
270 if (!pt->sema)
271 goto done;
272
273 /* Acquired == not realeased yet == active == not signaled. */
274 signaled = !nvgpu_semaphore_is_acquired(pt->sema);
275
276 if (signaled) {
277 /* Update min if necessary. */
278 if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
279 obj->min) == 1)
280 obj->min = pt->thresh;
281
282 /* Release the semaphore to the pool. */
283 nvgpu_semaphore_put(pt->sema);
284 pt->sema = NULL;
285 }
286done:
287 nvgpu_spinlock_release(&pt->lock);
288
289 return signaled;
290}
291
292static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
293{
294 bool a_expired;
295 bool b_expired;
296 struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
297 struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
298
299 if (WARN_ON(pt_a->obj != pt_b->obj))
300 return 0;
301
302 /* Early out */
303 if (a == b)
304 return 0;
305
306 a_expired = gk20a_sync_pt_has_signaled(a);
307 b_expired = gk20a_sync_pt_has_signaled(b);
308 if (a_expired && !b_expired) {
309 /* Easy, a was earlier */
310 return -1;
311 } else if (!a_expired && b_expired) {
312 /* Easy, b was earlier */
313 return 1;
314 }
315
316 /* Both a and b are expired (trigger before min) or not
317 * expired (trigger after min), so we can use min
318 * as a reference value for __gk20a_sync_pt_compare_ref.
319 */
320 return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
321 pt_a->thresh, pt_b->thresh);
322}
323
324static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
325{
326 return obj->min;
327}
328
329static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
330 char *str, int size)
331{
332 struct gk20a_sync_timeline *obj =
333 (struct gk20a_sync_timeline *)timeline;
334 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
335}
336
337static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
338 char *str, int size)
339{
340 struct nvgpu_semaphore *s = pt->sema;
341
342 snprintf(str, size, "S: c=%d [v=%u,r_v=%u]",
343 s->hw_sema->ch->chid,
344 nvgpu_semaphore_get_value(s),
345 nvgpu_semaphore_read(s));
346}
347
348static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
349 int size)
350{
351 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
352
353 if (pt->sema) {
354 gk20a_sync_pt_value_str_for_sema(pt, str, size);
355 return;
356 }
357
358 snprintf(str, size, "%d", pt->thresh);
359}
360
361static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
362 .driver_name = "nvgpu_semaphore",
363 .dup = gk20a_sync_pt_dup_inst,
364 .has_signaled = gk20a_sync_pt_has_signaled,
365 .compare = gk20a_sync_pt_compare,
366 .free_pt = gk20a_sync_pt_free_inst,
367 .timeline_value_str = gk20a_sync_timeline_value_str,
368 .pt_value_str = gk20a_sync_pt_value_str,
369};
370
371/* Public API */
372
373struct sync_fence *gk20a_sync_fence_fdget(int fd)
374{
375 return sync_fence_fdget(fd);
376}
377
378void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
379{
380 sync_timeline_signal(timeline, 0);
381}
382
383void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
384{
385 sync_timeline_destroy(timeline);
386}
387
388struct sync_timeline *gk20a_sync_timeline_create(
389 const char *fmt, ...)
390{
391 struct gk20a_sync_timeline *obj;
392 char name[30];
393 va_list args;
394
395 va_start(args, fmt);
396 vsnprintf(name, sizeof(name), fmt, args);
397 va_end(args);
398
399 obj = (struct gk20a_sync_timeline *)
400 sync_timeline_create(&gk20a_sync_timeline_ops,
401 sizeof(struct gk20a_sync_timeline),
402 name);
403 if (!obj)
404 return NULL;
405 obj->max = 0;
406 obj->min = 0;
407 return &obj->obj;
408}
409
410struct sync_fence *gk20a_sync_fence_create(
411 struct gk20a *g,
412 struct sync_timeline *obj,
413 struct nvgpu_semaphore *sema,
414 const char *fmt, ...)
415{
416 char name[30];
417 va_list args;
418 struct sync_pt *pt;
419 struct sync_fence *fence;
420 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
421
422 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
423 if (pt == NULL)
424 return NULL;
425
426 va_start(args, fmt);
427 vsnprintf(name, sizeof(name), fmt, args);
428 va_end(args);
429
430 fence = sync_fence_create(name, pt);
431 if (fence == NULL) {
432 sync_pt_free(pt);
433 return NULL;
434 }
435 return fence;
436}