summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
diff options
context:
space:
mode:
authorLauri Peltonen <lpeltonen@nvidia.com>2014-02-25 07:44:57 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:57 -0400
commit954117fe11481b79c9c525f7c75384ac40b64cbe (patch)
treee252aaef514f3050b8d39eb58f640659df2bda57 /drivers/gpu/nvgpu/gk20a/sync_gk20a.c
parent1c9aaa1eafcf91fbc29404b449f2bec072c804a5 (diff)
gpu: nvgpu: Support sync framework with semaphores
Add sync_gk20a.c/h that support creating Android sync fence fd's from gk20a semaphores. Bug 1445450 Change-Id: I42272996721ceec38ba5510eae6770720bc9dd10 Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-on: http://git-master/r/374843 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c408
1 files changed, 408 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
new file mode 100644
index 00000000..1f2eae1a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -0,0 +1,408 @@
1/*
2 * drivers/video/tegra/host/gk20a/sync_gk20a.h
3 *
4 * GK20A Sync Framework Integration
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include "sync_gk20a.h"
19
20#include <linux/kernel.h>
21#include <linux/file.h>
22#include <linux/fs.h>
23#include <linux/hrtimer.h>
24#include <linux/module.h>
25#include <linux/slab.h>
26#include <linux/nvhost_ioctl.h>
27#include "../../../staging/android/sync.h"
28#include "semaphore_gk20a.h"
29
30static const struct sync_timeline_ops gk20a_sync_timeline_ops;
31
32struct gk20a_sync_timeline {
33 struct sync_timeline obj;
34 u32 max;
35 u32 min;
36};
37
38/**
39 * The sync framework dups pts when merging fences. We share a single
40 * refcounted gk20a_sync_pt for each duped pt.
41 */
42struct gk20a_sync_pt {
43 struct kref refcount;
44 u32 thresh;
45 struct gk20a_semaphore *sema;
46 struct gk20a_sync_timeline *obj;
47 struct sync_fence *dep;
48 ktime_t dep_timestamp;
49};
50
51struct gk20a_sync_pt_inst {
52 struct sync_pt pt;
53 struct gk20a_sync_pt *shared;
54};
55
56/**
57 * Compares sync pt values a and b, both of which will trigger either before
58 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
59 * ref). Supplying ref allows us to handle wrapping correctly.
60 *
61 * Returns -1 if a < b (a triggers before b)
62 * 0 if a = b (a and b trigger at the same time)
63 * 1 if a > b (b triggers before a)
64 */
65static int __gk20a_sync_pt_compare_ref(
66 u32 ref,
67 u32 a,
68 u32 b)
69{
70 /*
71 * We normalize both a and b by subtracting ref from them.
72 * Denote the normalized values by a_n and b_n. Note that because
73 * of wrapping, a_n and/or b_n may be negative.
74 *
75 * The normalized values a_n and b_n satisfy:
76 * - a positive value triggers before a negative value
77 * - a smaller positive value triggers before a greater positive value
78 * - a smaller negative value (greater in absolute value) triggers
79 * before a greater negative value (smaller in absolute value).
80 *
81 * Thus we can just stick to unsigned arithmetic and compare
82 * (u32)a_n to (u32)b_n.
83 *
84 * Just to reiterate the possible cases:
85 *
86 * 1A) ...ref..a....b....
87 * 1B) ...ref..b....a....
88 * 2A) ...b....ref..a.... b_n < 0
89 * 2B) ...a....ref..b.... a_n > 0
90 * 3A) ...a....b....ref.. a_n < 0, b_n < 0
91 * 3A) ...b....a....ref.. a_n < 0, b_n < 0
92 */
93 u32 a_n = a - ref;
94 u32 b_n = b - ref;
95 if (a_n < b_n)
96 return -1;
97 else if (a_n > b_n)
98 return 1;
99 else
100 return 0;
101}
102
103struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
104{
105 struct gk20a_sync_pt_inst *pti =
106 container_of(pt, struct gk20a_sync_pt_inst, pt);
107 return pti->shared;
108}
109struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
110{
111 if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
112 return NULL;
113 return (struct gk20a_sync_timeline *)obj;
114}
115
116static void gk20a_sync_pt_free_shared(struct kref *ref)
117{
118 struct gk20a_sync_pt *pt =
119 container_of(ref, struct gk20a_sync_pt, refcount);
120
121 if (pt->dep)
122 sync_fence_put(pt->dep);
123 if (pt->sema)
124 gk20a_semaphore_put(pt->sema);
125 kfree(pt);
126}
127
128static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
129 struct gk20a_sync_timeline *obj,
130 struct gk20a_semaphore *sema,
131 struct sync_fence *dependency)
132{
133 struct gk20a_sync_pt *shared;
134
135 shared = kzalloc(sizeof(*shared), GFP_KERNEL);
136 if (!shared)
137 return NULL;
138
139 kref_init(&shared->refcount);
140 shared->obj = obj;
141 shared->sema = sema;
142 shared->thresh = ++obj->max; /* sync framework has a lock */
143
144 /* Store the dependency fence for this pt. */
145 if (dependency) {
146 if (dependency->status == 0) {
147 shared->dep = dependency;
148 } else {
149 shared->dep_timestamp = ktime_get();
150 sync_fence_put(dependency);
151 }
152 }
153
154 gk20a_semaphore_get(sema);
155 return shared;
156}
157
158static struct sync_pt *gk20a_sync_pt_create_inst(
159 struct gk20a_sync_timeline *obj,
160 struct gk20a_semaphore *sema,
161 struct sync_fence *dependency)
162{
163 struct gk20a_sync_pt_inst *pti;
164
165 pti = (struct gk20a_sync_pt_inst *)
166 sync_pt_create(&obj->obj, sizeof(*pti));
167 if (!pti)
168 return NULL;
169
170 pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency);
171 if (!pti->shared) {
172 sync_pt_free(&pti->pt);
173 return NULL;
174 }
175 return &pti->pt;
176}
177
178static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
179{
180 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
181 if (pt)
182 kref_put(&pt->refcount, gk20a_sync_pt_free_shared);
183}
184
185static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
186{
187 struct gk20a_sync_pt_inst *pti;
188 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
189
190 pti = (struct gk20a_sync_pt_inst *)
191 sync_pt_create(&pt->obj->obj, sizeof(*pti));
192 if (!pti)
193 return NULL;
194 pti->shared = pt;
195 kref_get(&pt->refcount);
196 return &pti->pt;
197}
198
199static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
200{
201 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
202 struct gk20a_sync_timeline *obj = pt->obj;
203 struct sync_pt *pos;
204 bool signaled;
205
206 if (!pt->sema)
207 return true;
208
209 /* Acquired == not realeased yet == active == not signaled. */
210 signaled = !gk20a_semaphore_is_acquired(pt->sema);
211
212 if (signaled) {
213 /* Update min if necessary. */
214 if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
215 obj->min) == 1)
216 obj->min = pt->thresh;
217
218 /* Release the dependency fence, but get its timestamp
219 * first.*/
220 if (pt->dep) {
221 s64 ns = 0;
222 struct list_head *dep_pts = &pt->dep->pt_list_head;
223 list_for_each_entry(pos, dep_pts, pt_list) {
224 ns = max(ns, ktime_to_ns(pos->timestamp));
225 }
226 pt->dep_timestamp = ns_to_ktime(ns);
227 sync_fence_put(pt->dep);
228 pt->dep = NULL;
229 }
230
231 /* Release the semaphore to the pool. */
232 gk20a_semaphore_put(pt->sema);
233 pt->sema = NULL;
234 }
235 return signaled;
236}
237
238static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt)
239{
240 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
241 if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64)
242 return ns_to_ktime(0);
243 return ktime_sub(sync_pt->timestamp, pt->dep_timestamp);
244}
245
246static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
247{
248 bool a_expired;
249 bool b_expired;
250 struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
251 struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
252
253 if (WARN_ON(pt_a->obj != pt_b->obj))
254 return 0;
255
256 /* Early out */
257 if (a == b)
258 return 0;
259
260 a_expired = gk20a_sync_pt_has_signaled(a);
261 b_expired = gk20a_sync_pt_has_signaled(b);
262 if (a_expired && !b_expired) {
263 /* Easy, a was earlier */
264 return -1;
265 } else if (!a_expired && b_expired) {
266 /* Easy, b was earlier */
267 return 1;
268 }
269
270 /* Both a and b are expired (trigger before min) or not
271 * expired (trigger after min), so we can use min
272 * as a reference value for __gk20a_sync_pt_compare_ref.
273 */
274 return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
275 pt_a->thresh, pt_b->thresh);
276}
277
278static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
279{
280 return obj->min;
281}
282
283static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
284 char *str, int size)
285{
286 struct gk20a_sync_timeline *obj =
287 (struct gk20a_sync_timeline *)timeline;
288 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
289}
290
291static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
292 int size)
293{
294 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
295 ktime_t dur = gk20a_sync_pt_duration(sync_pt);
296 if (pt->dep) {
297 snprintf(str, size, "(dep: [%p] %s) %d",
298 pt->dep, pt->dep->name, pt->thresh);
299 } else if (dur.tv64) {
300 struct timeval tv = ktime_to_timeval(dur);
301 snprintf(str, size, "(took %ld.%03ld ms) %d",
302 tv.tv_sec * 1000 + tv.tv_usec / 1000,
303 tv.tv_usec % 1000,
304 pt->thresh);
305 } else {
306 snprintf(str, size, "%d", pt->thresh);
307 }
308}
309
310static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt,
311 void *data, int size)
312{
313 struct gk20a_sync_pt_info info;
314
315 if (size < sizeof(info))
316 return -ENOMEM;
317
318 info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt));
319 memcpy(data, &info, sizeof(info));
320
321 return sizeof(info);
322}
323
324static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
325 .driver_name = "gk20a_semaphore",
326 .dup = gk20a_sync_pt_dup_inst,
327 .has_signaled = gk20a_sync_pt_has_signaled,
328 .compare = gk20a_sync_pt_compare,
329 .free_pt = gk20a_sync_pt_free_inst,
330 .fill_driver_data = gk20a_sync_fill_driver_data,
331 .timeline_value_str = gk20a_sync_timeline_value_str,
332 .pt_value_str = gk20a_sync_pt_value_str,
333};
334
335/* Public API */
336
337struct sync_fence *gk20a_sync_fence_fdget(int fd)
338{
339 return sync_fence_fdget(fd);
340}
341
342void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
343{
344 sync_timeline_signal(timeline);
345}
346
347void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
348{
349 sync_timeline_destroy(timeline);
350}
351
352struct sync_timeline *gk20a_sync_timeline_create(
353 const char *fmt, ...)
354{
355 struct gk20a_sync_timeline *obj;
356 char name[30];
357 va_list args;
358
359 va_start(args, fmt);
360 vsnprintf(name, sizeof(name), fmt, args);
361 va_end(args);
362
363 obj = (struct gk20a_sync_timeline *)
364 sync_timeline_create(&gk20a_sync_timeline_ops,
365 sizeof(struct gk20a_sync_timeline),
366 name);
367 if (!obj)
368 return NULL;
369 obj->max = 0;
370 obj->min = 0;
371 return &obj->obj;
372}
373
374int gk20a_sync_fence_create(struct sync_timeline *obj,
375 struct gk20a_semaphore *sema,
376 struct sync_fence *dependency,
377 const char *fmt, ...)
378{
379 int fd;
380 char name[30];
381 va_list args;
382 struct sync_pt *pt;
383 struct sync_fence *fence;
384 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
385
386 pt = gk20a_sync_pt_create_inst(timeline, sema, dependency);
387 if (pt == NULL)
388 return -ENOMEM;
389
390 va_start(args, fmt);
391 vsnprintf(name, sizeof(name), fmt, args);
392 va_end(args);
393
394 fence = sync_fence_create(name, pt);
395 if (fence == NULL) {
396 sync_pt_free(pt);
397 return -ENOMEM;
398 }
399
400 fd = get_unused_fd();
401 if (fd < 0) {
402 sync_fence_put(fence);
403 return fd;
404 }
405
406 sync_fence_install(fence, fd);
407 return fd;
408}