diff options
author | Debarshi Dutta <ddutta@nvidia.com> | 2018-05-10 23:48:48 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-05-14 09:07:12 -0400 |
commit | a51eb9da021c2934e196c5d8be04551703e6bb5b (patch) | |
tree | 843036626eff3733759ab003fc577a1ae2e729ae /drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |
parent | 4dac924aba27aa46267fb39f3ed968318292a7f5 (diff) |
gpu: nvgpu: move sync_gk20a under common/linux directory
sync_gk20a.* files are no longer used by core code and only invoked
from linux specific implementations of the OS_FENCE framework which are
under the common/linux directory. Hence, sync_gk20a.* files are also
moved under common/linux.
JIRA NVGPU-66
Change-Id: If623524611373d2da39b63cfb3c1e40089bf8d22
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1712900
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 425 |
1 files changed, 0 insertions, 425 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c deleted file mode 100644 index 56c90da7..00000000 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ /dev/null | |||
@@ -1,425 +0,0 @@ | |||
1 | /* | ||
2 | * GK20A Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <linux/file.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/hrtimer.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <nvgpu/lock.h> | ||
30 | |||
31 | #include <nvgpu/kmem.h> | ||
32 | #include <nvgpu/semaphore.h> | ||
33 | #include <nvgpu/bug.h> | ||
34 | #include <nvgpu/kref.h> | ||
35 | #include "../common/linux/channel.h" | ||
36 | |||
37 | #include "../drivers/staging/android/sync.h" | ||
38 | |||
39 | #include "sync_gk20a.h" | ||
40 | |||
41 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
42 | |||
43 | struct gk20a_sync_timeline { | ||
44 | struct sync_timeline obj; | ||
45 | u32 max; | ||
46 | u32 min; | ||
47 | }; | ||
48 | |||
49 | /** | ||
50 | * The sync framework dups pts when merging fences. We share a single | ||
51 | * refcounted gk20a_sync_pt for each duped pt. | ||
52 | */ | ||
53 | struct gk20a_sync_pt { | ||
54 | struct gk20a *g; | ||
55 | struct nvgpu_ref refcount; | ||
56 | u32 thresh; | ||
57 | struct nvgpu_semaphore *sema; | ||
58 | struct gk20a_sync_timeline *obj; | ||
59 | |||
60 | /* | ||
61 | * Use a spin lock here since it will have better performance | ||
62 | * than a mutex - there should be very little contention on this | ||
63 | * lock. | ||
64 | */ | ||
65 | struct nvgpu_spinlock lock; | ||
66 | }; | ||
67 | |||
68 | struct gk20a_sync_pt_inst { | ||
69 | struct sync_pt pt; | ||
70 | struct gk20a_sync_pt *shared; | ||
71 | }; | ||
72 | |||
73 | /** | ||
74 | * Compares sync pt values a and b, both of which will trigger either before | ||
75 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
76 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
77 | * | ||
78 | * Returns -1 if a < b (a triggers before b) | ||
79 | * 0 if a = b (a and b trigger at the same time) | ||
80 | * 1 if a > b (b triggers before a) | ||
81 | */ | ||
82 | static int __gk20a_sync_pt_compare_ref( | ||
83 | u32 ref, | ||
84 | u32 a, | ||
85 | u32 b) | ||
86 | { | ||
87 | /* | ||
88 | * We normalize both a and b by subtracting ref from them. | ||
89 | * Denote the normalized values by a_n and b_n. Note that because | ||
90 | * of wrapping, a_n and/or b_n may be negative. | ||
91 | * | ||
92 | * The normalized values a_n and b_n satisfy: | ||
93 | * - a positive value triggers before a negative value | ||
94 | * - a smaller positive value triggers before a greater positive value | ||
95 | * - a smaller negative value (greater in absolute value) triggers | ||
96 | * before a greater negative value (smaller in absolute value). | ||
97 | * | ||
98 | * Thus we can just stick to unsigned arithmetic and compare | ||
99 | * (u32)a_n to (u32)b_n. | ||
100 | * | ||
101 | * Just to reiterate the possible cases: | ||
102 | * | ||
103 | * 1A) ...ref..a....b.... | ||
104 | * 1B) ...ref..b....a.... | ||
105 | * 2A) ...b....ref..a.... b_n < 0 | ||
106 | * 2B) ...a....ref..b.... a_n > 0 | ||
107 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
108 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
109 | */ | ||
110 | u32 a_n = a - ref; | ||
111 | u32 b_n = b - ref; | ||
112 | if (a_n < b_n) | ||
113 | return -1; | ||
114 | else if (a_n > b_n) | ||
115 | return 1; | ||
116 | else | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
121 | { | ||
122 | struct gk20a_sync_pt_inst *pti = | ||
123 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
124 | return pti->shared; | ||
125 | } | ||
126 | static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
127 | { | ||
128 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
129 | return NULL; | ||
130 | return (struct gk20a_sync_timeline *)obj; | ||
131 | } | ||
132 | |||
133 | static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) | ||
134 | { | ||
135 | struct gk20a_sync_pt *pt = | ||
136 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
137 | struct gk20a *g = pt->g; | ||
138 | |||
139 | if (pt->sema) | ||
140 | nvgpu_semaphore_put(pt->sema); | ||
141 | nvgpu_kfree(g, pt); | ||
142 | } | ||
143 | |||
144 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
145 | struct gk20a *g, | ||
146 | struct gk20a_sync_timeline *obj, | ||
147 | struct nvgpu_semaphore *sema) | ||
148 | { | ||
149 | struct gk20a_sync_pt *shared; | ||
150 | |||
151 | shared = nvgpu_kzalloc(g, sizeof(*shared)); | ||
152 | if (!shared) | ||
153 | return NULL; | ||
154 | |||
155 | nvgpu_ref_init(&shared->refcount); | ||
156 | shared->g = g; | ||
157 | shared->obj = obj; | ||
158 | shared->sema = sema; | ||
159 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
160 | |||
161 | nvgpu_spinlock_init(&shared->lock); | ||
162 | |||
163 | nvgpu_semaphore_get(sema); | ||
164 | |||
165 | return shared; | ||
166 | } | ||
167 | |||
168 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
169 | struct gk20a *g, | ||
170 | struct gk20a_sync_timeline *obj, | ||
171 | struct nvgpu_semaphore *sema) | ||
172 | { | ||
173 | struct gk20a_sync_pt_inst *pti; | ||
174 | |||
175 | pti = (struct gk20a_sync_pt_inst *) | ||
176 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
177 | if (!pti) | ||
178 | return NULL; | ||
179 | |||
180 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); | ||
181 | if (!pti->shared) { | ||
182 | sync_pt_free(&pti->pt); | ||
183 | return NULL; | ||
184 | } | ||
185 | return &pti->pt; | ||
186 | } | ||
187 | |||
188 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
189 | { | ||
190 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
191 | if (pt) | ||
192 | nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
193 | } | ||
194 | |||
195 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
196 | { | ||
197 | struct gk20a_sync_pt_inst *pti; | ||
198 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
199 | |||
200 | pti = (struct gk20a_sync_pt_inst *) | ||
201 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
202 | if (!pti) | ||
203 | return NULL; | ||
204 | pti->shared = pt; | ||
205 | nvgpu_ref_get(&pt->refcount); | ||
206 | return &pti->pt; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * This function must be able to run on the same sync_pt concurrently. This | ||
211 | * requires a lock to protect access to the sync_pt's internal data structures | ||
212 | * which are modified as a side effect of calling this function. | ||
213 | */ | ||
214 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
215 | { | ||
216 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
217 | struct gk20a_sync_timeline *obj = pt->obj; | ||
218 | bool signaled = true; | ||
219 | |||
220 | nvgpu_spinlock_acquire(&pt->lock); | ||
221 | if (!pt->sema) | ||
222 | goto done; | ||
223 | |||
224 | /* Acquired == not realeased yet == active == not signaled. */ | ||
225 | signaled = !nvgpu_semaphore_is_acquired(pt->sema); | ||
226 | |||
227 | if (signaled) { | ||
228 | /* Update min if necessary. */ | ||
229 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
230 | obj->min) == 1) | ||
231 | obj->min = pt->thresh; | ||
232 | |||
233 | /* Release the semaphore to the pool. */ | ||
234 | nvgpu_semaphore_put(pt->sema); | ||
235 | pt->sema = NULL; | ||
236 | } | ||
237 | done: | ||
238 | nvgpu_spinlock_release(&pt->lock); | ||
239 | |||
240 | return signaled; | ||
241 | } | ||
242 | |||
243 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
244 | { | ||
245 | bool a_expired; | ||
246 | bool b_expired; | ||
247 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
248 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
249 | |||
250 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
251 | return 0; | ||
252 | |||
253 | /* Early out */ | ||
254 | if (a == b) | ||
255 | return 0; | ||
256 | |||
257 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
258 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
259 | if (a_expired && !b_expired) { | ||
260 | /* Easy, a was earlier */ | ||
261 | return -1; | ||
262 | } else if (!a_expired && b_expired) { | ||
263 | /* Easy, b was earlier */ | ||
264 | return 1; | ||
265 | } | ||
266 | |||
267 | /* Both a and b are expired (trigger before min) or not | ||
268 | * expired (trigger after min), so we can use min | ||
269 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
270 | */ | ||
271 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
272 | pt_a->thresh, pt_b->thresh); | ||
273 | } | ||
274 | |||
275 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
276 | { | ||
277 | return obj->min; | ||
278 | } | ||
279 | |||
280 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
281 | char *str, int size) | ||
282 | { | ||
283 | struct gk20a_sync_timeline *obj = | ||
284 | (struct gk20a_sync_timeline *)timeline; | ||
285 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
286 | } | ||
287 | |||
288 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
289 | char *str, int size) | ||
290 | { | ||
291 | struct nvgpu_semaphore *s = pt->sema; | ||
292 | |||
293 | snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]", | ||
294 | s->location.pool->page_idx, | ||
295 | nvgpu_semaphore_get_value(s), | ||
296 | nvgpu_semaphore_read(s)); | ||
297 | } | ||
298 | |||
299 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
300 | int size) | ||
301 | { | ||
302 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
303 | |||
304 | if (pt->sema) { | ||
305 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | snprintf(str, size, "%d", pt->thresh); | ||
310 | } | ||
311 | |||
312 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
313 | .driver_name = "nvgpu_semaphore", | ||
314 | .dup = gk20a_sync_pt_dup_inst, | ||
315 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
316 | .compare = gk20a_sync_pt_compare, | ||
317 | .free_pt = gk20a_sync_pt_free_inst, | ||
318 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
319 | .pt_value_str = gk20a_sync_pt_value_str, | ||
320 | }; | ||
321 | |||
322 | /* Public API */ | ||
323 | |||
324 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
325 | { | ||
326 | struct sync_fence *fence = sync_fence_fdget(fd); | ||
327 | int i; | ||
328 | |||
329 | if (!fence) | ||
330 | return NULL; | ||
331 | |||
332 | for (i = 0; i < fence->num_fences; i++) { | ||
333 | struct fence *pt = fence->cbs[i].sync_pt; | ||
334 | struct sync_pt *spt = sync_pt_from_fence(pt); | ||
335 | struct sync_timeline *t; | ||
336 | |||
337 | if (spt == NULL) { | ||
338 | sync_fence_put(fence); | ||
339 | return NULL; | ||
340 | } | ||
341 | |||
342 | t = sync_pt_parent(spt); | ||
343 | if (t->ops != &gk20a_sync_timeline_ops) { | ||
344 | sync_fence_put(fence); | ||
345 | return NULL; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | return fence; | ||
350 | } | ||
351 | |||
352 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) | ||
353 | { | ||
354 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); | ||
355 | struct nvgpu_semaphore *sema; | ||
356 | |||
357 | nvgpu_spinlock_acquire(&pt->lock); | ||
358 | sema = pt->sema; | ||
359 | if (sema) | ||
360 | nvgpu_semaphore_get(sema); | ||
361 | nvgpu_spinlock_release(&pt->lock); | ||
362 | |||
363 | return sema; | ||
364 | } | ||
365 | |||
366 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
367 | { | ||
368 | sync_timeline_signal(timeline, 0); | ||
369 | } | ||
370 | |||
371 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
372 | { | ||
373 | sync_timeline_destroy(timeline); | ||
374 | } | ||
375 | |||
376 | struct sync_timeline *gk20a_sync_timeline_create( | ||
377 | const char *name) | ||
378 | { | ||
379 | struct gk20a_sync_timeline *obj; | ||
380 | |||
381 | obj = (struct gk20a_sync_timeline *) | ||
382 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
383 | sizeof(struct gk20a_sync_timeline), | ||
384 | name); | ||
385 | if (!obj) | ||
386 | return NULL; | ||
387 | obj->max = 0; | ||
388 | obj->min = 0; | ||
389 | return &obj->obj; | ||
390 | } | ||
391 | |||
392 | struct sync_fence *gk20a_sync_fence_create( | ||
393 | struct channel_gk20a *c, | ||
394 | struct nvgpu_semaphore *sema, | ||
395 | const char *fmt, ...) | ||
396 | { | ||
397 | char name[30]; | ||
398 | va_list args; | ||
399 | struct sync_pt *pt; | ||
400 | struct sync_fence *fence; | ||
401 | struct gk20a *g = c->g; | ||
402 | |||
403 | struct nvgpu_channel_linux *os_channel_priv = c->os_priv; | ||
404 | struct nvgpu_os_fence_framework *fence_framework = NULL; | ||
405 | struct gk20a_sync_timeline *timeline = NULL; | ||
406 | |||
407 | fence_framework = &os_channel_priv->fence_framework; | ||
408 | |||
409 | timeline = to_gk20a_timeline(fence_framework->timeline); | ||
410 | |||
411 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); | ||
412 | if (pt == NULL) | ||
413 | return NULL; | ||
414 | |||
415 | va_start(args, fmt); | ||
416 | vsnprintf(name, sizeof(name), fmt, args); | ||
417 | va_end(args); | ||
418 | |||
419 | fence = sync_fence_create(name, pt); | ||
420 | if (fence == NULL) { | ||
421 | sync_pt_free(pt); | ||
422 | return NULL; | ||
423 | } | ||
424 | return fence; | ||
425 | } | ||