diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 408 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.h | 46 |
3 files changed, 455 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile index e5eb817d..341f8418 100644 --- a/drivers/gpu/nvgpu/gk20a/Makefile +++ b/drivers/gpu/nvgpu/gk20a/Makefile | |||
@@ -34,5 +34,6 @@ nvgpu-y := \ | |||
34 | gk20a_allocator.o \ | 34 | gk20a_allocator.o \ |
35 | platform_gk20a_generic.o | 35 | platform_gk20a_generic.o |
36 | nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o | 36 | nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o |
37 | nvgpu-$(CONFIG_SYNC) += sync_gk20a.o | ||
37 | 38 | ||
38 | obj-$(CONFIG_GK20A) := nvgpu.o | 39 | obj-$(CONFIG_GK20A) := nvgpu.o |
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c new file mode 100644 index 00000000..1f2eae1a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/sync_gk20a.h | ||
3 | * | ||
4 | * GK20A Sync Framework Integration | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include "sync_gk20a.h" | ||
19 | |||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/file.h> | ||
22 | #include <linux/fs.h> | ||
23 | #include <linux/hrtimer.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/nvhost_ioctl.h> | ||
27 | #include "../../../staging/android/sync.h" | ||
28 | #include "semaphore_gk20a.h" | ||
29 | |||
30 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
31 | |||
32 | struct gk20a_sync_timeline { | ||
33 | struct sync_timeline obj; | ||
34 | u32 max; | ||
35 | u32 min; | ||
36 | }; | ||
37 | |||
38 | /** | ||
39 | * The sync framework dups pts when merging fences. We share a single | ||
40 | * refcounted gk20a_sync_pt for each duped pt. | ||
41 | */ | ||
42 | struct gk20a_sync_pt { | ||
43 | struct kref refcount; | ||
44 | u32 thresh; | ||
45 | struct gk20a_semaphore *sema; | ||
46 | struct gk20a_sync_timeline *obj; | ||
47 | struct sync_fence *dep; | ||
48 | ktime_t dep_timestamp; | ||
49 | }; | ||
50 | |||
51 | struct gk20a_sync_pt_inst { | ||
52 | struct sync_pt pt; | ||
53 | struct gk20a_sync_pt *shared; | ||
54 | }; | ||
55 | |||
56 | /** | ||
57 | * Compares sync pt values a and b, both of which will trigger either before | ||
58 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
59 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
60 | * | ||
61 | * Returns -1 if a < b (a triggers before b) | ||
62 | * 0 if a = b (a and b trigger at the same time) | ||
63 | * 1 if a > b (b triggers before a) | ||
64 | */ | ||
65 | static int __gk20a_sync_pt_compare_ref( | ||
66 | u32 ref, | ||
67 | u32 a, | ||
68 | u32 b) | ||
69 | { | ||
70 | /* | ||
71 | * We normalize both a and b by subtracting ref from them. | ||
72 | * Denote the normalized values by a_n and b_n. Note that because | ||
73 | * of wrapping, a_n and/or b_n may be negative. | ||
74 | * | ||
75 | * The normalized values a_n and b_n satisfy: | ||
76 | * - a positive value triggers before a negative value | ||
77 | * - a smaller positive value triggers before a greater positive value | ||
78 | * - a smaller negative value (greater in absolute value) triggers | ||
79 | * before a greater negative value (smaller in absolute value). | ||
80 | * | ||
81 | * Thus we can just stick to unsigned arithmetic and compare | ||
82 | * (u32)a_n to (u32)b_n. | ||
83 | * | ||
84 | * Just to reiterate the possible cases: | ||
85 | * | ||
86 | * 1A) ...ref..a....b.... | ||
87 | * 1B) ...ref..b....a.... | ||
88 | * 2A) ...b....ref..a.... b_n < 0 | ||
89 | * 2B) ...a....ref..b.... a_n > 0 | ||
90 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
91 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
92 | */ | ||
93 | u32 a_n = a - ref; | ||
94 | u32 b_n = b - ref; | ||
95 | if (a_n < b_n) | ||
96 | return -1; | ||
97 | else if (a_n > b_n) | ||
98 | return 1; | ||
99 | else | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
104 | { | ||
105 | struct gk20a_sync_pt_inst *pti = | ||
106 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
107 | return pti->shared; | ||
108 | } | ||
109 | struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
110 | { | ||
111 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
112 | return NULL; | ||
113 | return (struct gk20a_sync_timeline *)obj; | ||
114 | } | ||
115 | |||
116 | static void gk20a_sync_pt_free_shared(struct kref *ref) | ||
117 | { | ||
118 | struct gk20a_sync_pt *pt = | ||
119 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
120 | |||
121 | if (pt->dep) | ||
122 | sync_fence_put(pt->dep); | ||
123 | if (pt->sema) | ||
124 | gk20a_semaphore_put(pt->sema); | ||
125 | kfree(pt); | ||
126 | } | ||
127 | |||
128 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
129 | struct gk20a_sync_timeline *obj, | ||
130 | struct gk20a_semaphore *sema, | ||
131 | struct sync_fence *dependency) | ||
132 | { | ||
133 | struct gk20a_sync_pt *shared; | ||
134 | |||
135 | shared = kzalloc(sizeof(*shared), GFP_KERNEL); | ||
136 | if (!shared) | ||
137 | return NULL; | ||
138 | |||
139 | kref_init(&shared->refcount); | ||
140 | shared->obj = obj; | ||
141 | shared->sema = sema; | ||
142 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
143 | |||
144 | /* Store the dependency fence for this pt. */ | ||
145 | if (dependency) { | ||
146 | if (dependency->status == 0) { | ||
147 | shared->dep = dependency; | ||
148 | } else { | ||
149 | shared->dep_timestamp = ktime_get(); | ||
150 | sync_fence_put(dependency); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | gk20a_semaphore_get(sema); | ||
155 | return shared; | ||
156 | } | ||
157 | |||
158 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
159 | struct gk20a_sync_timeline *obj, | ||
160 | struct gk20a_semaphore *sema, | ||
161 | struct sync_fence *dependency) | ||
162 | { | ||
163 | struct gk20a_sync_pt_inst *pti; | ||
164 | |||
165 | pti = (struct gk20a_sync_pt_inst *) | ||
166 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
167 | if (!pti) | ||
168 | return NULL; | ||
169 | |||
170 | pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); | ||
171 | if (!pti->shared) { | ||
172 | sync_pt_free(&pti->pt); | ||
173 | return NULL; | ||
174 | } | ||
175 | return &pti->pt; | ||
176 | } | ||
177 | |||
178 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
179 | { | ||
180 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
181 | if (pt) | ||
182 | kref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
183 | } | ||
184 | |||
185 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
186 | { | ||
187 | struct gk20a_sync_pt_inst *pti; | ||
188 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
189 | |||
190 | pti = (struct gk20a_sync_pt_inst *) | ||
191 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
192 | if (!pti) | ||
193 | return NULL; | ||
194 | pti->shared = pt; | ||
195 | kref_get(&pt->refcount); | ||
196 | return &pti->pt; | ||
197 | } | ||
198 | |||
199 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
200 | { | ||
201 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
202 | struct gk20a_sync_timeline *obj = pt->obj; | ||
203 | struct sync_pt *pos; | ||
204 | bool signaled; | ||
205 | |||
206 | if (!pt->sema) | ||
207 | return true; | ||
208 | |||
209 | /* Acquired == not realeased yet == active == not signaled. */ | ||
210 | signaled = !gk20a_semaphore_is_acquired(pt->sema); | ||
211 | |||
212 | if (signaled) { | ||
213 | /* Update min if necessary. */ | ||
214 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
215 | obj->min) == 1) | ||
216 | obj->min = pt->thresh; | ||
217 | |||
218 | /* Release the dependency fence, but get its timestamp | ||
219 | * first.*/ | ||
220 | if (pt->dep) { | ||
221 | s64 ns = 0; | ||
222 | struct list_head *dep_pts = &pt->dep->pt_list_head; | ||
223 | list_for_each_entry(pos, dep_pts, pt_list) { | ||
224 | ns = max(ns, ktime_to_ns(pos->timestamp)); | ||
225 | } | ||
226 | pt->dep_timestamp = ns_to_ktime(ns); | ||
227 | sync_fence_put(pt->dep); | ||
228 | pt->dep = NULL; | ||
229 | } | ||
230 | |||
231 | /* Release the semaphore to the pool. */ | ||
232 | gk20a_semaphore_put(pt->sema); | ||
233 | pt->sema = NULL; | ||
234 | } | ||
235 | return signaled; | ||
236 | } | ||
237 | |||
238 | static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt) | ||
239 | { | ||
240 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
241 | if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64) | ||
242 | return ns_to_ktime(0); | ||
243 | return ktime_sub(sync_pt->timestamp, pt->dep_timestamp); | ||
244 | } | ||
245 | |||
246 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
247 | { | ||
248 | bool a_expired; | ||
249 | bool b_expired; | ||
250 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
251 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
252 | |||
253 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
254 | return 0; | ||
255 | |||
256 | /* Early out */ | ||
257 | if (a == b) | ||
258 | return 0; | ||
259 | |||
260 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
261 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
262 | if (a_expired && !b_expired) { | ||
263 | /* Easy, a was earlier */ | ||
264 | return -1; | ||
265 | } else if (!a_expired && b_expired) { | ||
266 | /* Easy, b was earlier */ | ||
267 | return 1; | ||
268 | } | ||
269 | |||
270 | /* Both a and b are expired (trigger before min) or not | ||
271 | * expired (trigger after min), so we can use min | ||
272 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
273 | */ | ||
274 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
275 | pt_a->thresh, pt_b->thresh); | ||
276 | } | ||
277 | |||
278 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
279 | { | ||
280 | return obj->min; | ||
281 | } | ||
282 | |||
283 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
284 | char *str, int size) | ||
285 | { | ||
286 | struct gk20a_sync_timeline *obj = | ||
287 | (struct gk20a_sync_timeline *)timeline; | ||
288 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
289 | } | ||
290 | |||
291 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
292 | int size) | ||
293 | { | ||
294 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
295 | ktime_t dur = gk20a_sync_pt_duration(sync_pt); | ||
296 | if (pt->dep) { | ||
297 | snprintf(str, size, "(dep: [%p] %s) %d", | ||
298 | pt->dep, pt->dep->name, pt->thresh); | ||
299 | } else if (dur.tv64) { | ||
300 | struct timeval tv = ktime_to_timeval(dur); | ||
301 | snprintf(str, size, "(took %ld.%03ld ms) %d", | ||
302 | tv.tv_sec * 1000 + tv.tv_usec / 1000, | ||
303 | tv.tv_usec % 1000, | ||
304 | pt->thresh); | ||
305 | } else { | ||
306 | snprintf(str, size, "%d", pt->thresh); | ||
307 | } | ||
308 | } | ||
309 | |||
310 | static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt, | ||
311 | void *data, int size) | ||
312 | { | ||
313 | struct gk20a_sync_pt_info info; | ||
314 | |||
315 | if (size < sizeof(info)) | ||
316 | return -ENOMEM; | ||
317 | |||
318 | info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt)); | ||
319 | memcpy(data, &info, sizeof(info)); | ||
320 | |||
321 | return sizeof(info); | ||
322 | } | ||
323 | |||
324 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
325 | .driver_name = "gk20a_semaphore", | ||
326 | .dup = gk20a_sync_pt_dup_inst, | ||
327 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
328 | .compare = gk20a_sync_pt_compare, | ||
329 | .free_pt = gk20a_sync_pt_free_inst, | ||
330 | .fill_driver_data = gk20a_sync_fill_driver_data, | ||
331 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
332 | .pt_value_str = gk20a_sync_pt_value_str, | ||
333 | }; | ||
334 | |||
335 | /* Public API */ | ||
336 | |||
337 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
338 | { | ||
339 | return sync_fence_fdget(fd); | ||
340 | } | ||
341 | |||
342 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
343 | { | ||
344 | sync_timeline_signal(timeline); | ||
345 | } | ||
346 | |||
347 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
348 | { | ||
349 | sync_timeline_destroy(timeline); | ||
350 | } | ||
351 | |||
352 | struct sync_timeline *gk20a_sync_timeline_create( | ||
353 | const char *fmt, ...) | ||
354 | { | ||
355 | struct gk20a_sync_timeline *obj; | ||
356 | char name[30]; | ||
357 | va_list args; | ||
358 | |||
359 | va_start(args, fmt); | ||
360 | vsnprintf(name, sizeof(name), fmt, args); | ||
361 | va_end(args); | ||
362 | |||
363 | obj = (struct gk20a_sync_timeline *) | ||
364 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
365 | sizeof(struct gk20a_sync_timeline), | ||
366 | name); | ||
367 | if (!obj) | ||
368 | return NULL; | ||
369 | obj->max = 0; | ||
370 | obj->min = 0; | ||
371 | return &obj->obj; | ||
372 | } | ||
373 | |||
374 | int gk20a_sync_fence_create(struct sync_timeline *obj, | ||
375 | struct gk20a_semaphore *sema, | ||
376 | struct sync_fence *dependency, | ||
377 | const char *fmt, ...) | ||
378 | { | ||
379 | int fd; | ||
380 | char name[30]; | ||
381 | va_list args; | ||
382 | struct sync_pt *pt; | ||
383 | struct sync_fence *fence; | ||
384 | struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); | ||
385 | |||
386 | pt = gk20a_sync_pt_create_inst(timeline, sema, dependency); | ||
387 | if (pt == NULL) | ||
388 | return -ENOMEM; | ||
389 | |||
390 | va_start(args, fmt); | ||
391 | vsnprintf(name, sizeof(name), fmt, args); | ||
392 | va_end(args); | ||
393 | |||
394 | fence = sync_fence_create(name, pt); | ||
395 | if (fence == NULL) { | ||
396 | sync_pt_free(pt); | ||
397 | return -ENOMEM; | ||
398 | } | ||
399 | |||
400 | fd = get_unused_fd(); | ||
401 | if (fd < 0) { | ||
402 | sync_fence_put(fence); | ||
403 | return fd; | ||
404 | } | ||
405 | |||
406 | sync_fence_install(fence, fd); | ||
407 | return fd; | ||
408 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h new file mode 100644 index 00000000..49597f06 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/sync_gk20a.h | ||
3 | * | ||
4 | * GK20A Sync Framework Integration | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef _GK20A_SYNC_H_ | ||
19 | #define _GK20A_SYNC_H_ | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | |||
23 | struct sync_timeline; | ||
24 | struct sync_fence; | ||
25 | struct sync_pt; | ||
26 | struct gk20a_semaphore; | ||
27 | |||
28 | #ifdef CONFIG_SYNC | ||
29 | struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); | ||
30 | void gk20a_sync_timeline_destroy(struct sync_timeline *); | ||
31 | void gk20a_sync_timeline_signal(struct sync_timeline *); | ||
32 | int gk20a_sync_fence_create(struct sync_timeline *, | ||
33 | struct gk20a_semaphore *, | ||
34 | struct sync_fence *dependency, | ||
35 | const char *fmt, ...); | ||
36 | struct sync_fence *gk20a_sync_fence_fdget(int fd); | ||
37 | #else | ||
38 | static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} | ||
39 | static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} | ||
40 | static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
41 | { | ||
42 | return NULL; | ||
43 | } | ||
44 | #endif | ||
45 | |||
46 | #endif | ||