summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2018-05-10 23:48:48 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-14 09:07:12 -0400
commita51eb9da021c2934e196c5d8be04551703e6bb5b (patch)
tree843036626eff3733759ab003fc577a1ae2e729ae /drivers/gpu/nvgpu/gk20a/sync_gk20a.c
parent4dac924aba27aa46267fb39f3ed968318292a7f5 (diff)
gpu: nvgpu: move sync_gk20a under common/linux directory
sync_gk20a.* files are no longer used by core code and only invoked from linux specific implementations of the OS_FENCE framework which are under the common/linux directory. Hence, sync_gk20a.* files are also moved under common/linux. JIRA NVGPU-66 Change-Id: If623524611373d2da39b63cfb3c1e40089bf8d22 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1712900 Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c425
1 files changed, 0 insertions, 425 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
deleted file mode 100644
index 56c90da7..00000000
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ /dev/null
@@ -1,425 +0,0 @@
1/*
2 * GK20A Sync Framework Integration
3 *
4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <linux/file.h>
26#include <linux/fs.h>
27#include <linux/hrtimer.h>
28#include <linux/module.h>
29#include <nvgpu/lock.h>
30
31#include <nvgpu/kmem.h>
32#include <nvgpu/semaphore.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/kref.h>
35#include "../common/linux/channel.h"
36
37#include "../drivers/staging/android/sync.h"
38
39#include "sync_gk20a.h"
40
41static const struct sync_timeline_ops gk20a_sync_timeline_ops;
42
43struct gk20a_sync_timeline {
44 struct sync_timeline obj;
45 u32 max;
46 u32 min;
47};
48
49/**
50 * The sync framework dups pts when merging fences. We share a single
51 * refcounted gk20a_sync_pt for each duped pt.
52 */
53struct gk20a_sync_pt {
54 struct gk20a *g;
55 struct nvgpu_ref refcount;
56 u32 thresh;
57 struct nvgpu_semaphore *sema;
58 struct gk20a_sync_timeline *obj;
59
60 /*
61 * Use a spin lock here since it will have better performance
62 * than a mutex - there should be very little contention on this
63 * lock.
64 */
65 struct nvgpu_spinlock lock;
66};
67
68struct gk20a_sync_pt_inst {
69 struct sync_pt pt;
70 struct gk20a_sync_pt *shared;
71};
72
73/**
74 * Compares sync pt values a and b, both of which will trigger either before
75 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
76 * ref). Supplying ref allows us to handle wrapping correctly.
77 *
78 * Returns -1 if a < b (a triggers before b)
79 * 0 if a = b (a and b trigger at the same time)
80 * 1 if a > b (b triggers before a)
81 */
82static int __gk20a_sync_pt_compare_ref(
83 u32 ref,
84 u32 a,
85 u32 b)
86{
87 /*
88 * We normalize both a and b by subtracting ref from them.
89 * Denote the normalized values by a_n and b_n. Note that because
90 * of wrapping, a_n and/or b_n may be negative.
91 *
92 * The normalized values a_n and b_n satisfy:
93 * - a positive value triggers before a negative value
94 * - a smaller positive value triggers before a greater positive value
95 * - a smaller negative value (greater in absolute value) triggers
96 * before a greater negative value (smaller in absolute value).
97 *
98 * Thus we can just stick to unsigned arithmetic and compare
99 * (u32)a_n to (u32)b_n.
100 *
101 * Just to reiterate the possible cases:
102 *
103 * 1A) ...ref..a....b....
104 * 1B) ...ref..b....a....
105 * 2A) ...b....ref..a.... b_n < 0
106 * 2B) ...a....ref..b.... a_n > 0
107 * 3A) ...a....b....ref.. a_n < 0, b_n < 0
108 * 3A) ...b....a....ref.. a_n < 0, b_n < 0
109 */
110 u32 a_n = a - ref;
111 u32 b_n = b - ref;
112 if (a_n < b_n)
113 return -1;
114 else if (a_n > b_n)
115 return 1;
116 else
117 return 0;
118}
119
120static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
121{
122 struct gk20a_sync_pt_inst *pti =
123 container_of(pt, struct gk20a_sync_pt_inst, pt);
124 return pti->shared;
125}
126static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
127{
128 if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
129 return NULL;
130 return (struct gk20a_sync_timeline *)obj;
131}
132
133static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
134{
135 struct gk20a_sync_pt *pt =
136 container_of(ref, struct gk20a_sync_pt, refcount);
137 struct gk20a *g = pt->g;
138
139 if (pt->sema)
140 nvgpu_semaphore_put(pt->sema);
141 nvgpu_kfree(g, pt);
142}
143
144static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
145 struct gk20a *g,
146 struct gk20a_sync_timeline *obj,
147 struct nvgpu_semaphore *sema)
148{
149 struct gk20a_sync_pt *shared;
150
151 shared = nvgpu_kzalloc(g, sizeof(*shared));
152 if (!shared)
153 return NULL;
154
155 nvgpu_ref_init(&shared->refcount);
156 shared->g = g;
157 shared->obj = obj;
158 shared->sema = sema;
159 shared->thresh = ++obj->max; /* sync framework has a lock */
160
161 nvgpu_spinlock_init(&shared->lock);
162
163 nvgpu_semaphore_get(sema);
164
165 return shared;
166}
167
168static struct sync_pt *gk20a_sync_pt_create_inst(
169 struct gk20a *g,
170 struct gk20a_sync_timeline *obj,
171 struct nvgpu_semaphore *sema)
172{
173 struct gk20a_sync_pt_inst *pti;
174
175 pti = (struct gk20a_sync_pt_inst *)
176 sync_pt_create(&obj->obj, sizeof(*pti));
177 if (!pti)
178 return NULL;
179
180 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
181 if (!pti->shared) {
182 sync_pt_free(&pti->pt);
183 return NULL;
184 }
185 return &pti->pt;
186}
187
188static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
189{
190 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
191 if (pt)
192 nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
193}
194
195static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
196{
197 struct gk20a_sync_pt_inst *pti;
198 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
199
200 pti = (struct gk20a_sync_pt_inst *)
201 sync_pt_create(&pt->obj->obj, sizeof(*pti));
202 if (!pti)
203 return NULL;
204 pti->shared = pt;
205 nvgpu_ref_get(&pt->refcount);
206 return &pti->pt;
207}
208
209/*
210 * This function must be able to run on the same sync_pt concurrently. This
211 * requires a lock to protect access to the sync_pt's internal data structures
212 * which are modified as a side effect of calling this function.
213 */
214static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
215{
216 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
217 struct gk20a_sync_timeline *obj = pt->obj;
218 bool signaled = true;
219
220 nvgpu_spinlock_acquire(&pt->lock);
221 if (!pt->sema)
222 goto done;
223
224 /* Acquired == not realeased yet == active == not signaled. */
225 signaled = !nvgpu_semaphore_is_acquired(pt->sema);
226
227 if (signaled) {
228 /* Update min if necessary. */
229 if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
230 obj->min) == 1)
231 obj->min = pt->thresh;
232
233 /* Release the semaphore to the pool. */
234 nvgpu_semaphore_put(pt->sema);
235 pt->sema = NULL;
236 }
237done:
238 nvgpu_spinlock_release(&pt->lock);
239
240 return signaled;
241}
242
243static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
244{
245 bool a_expired;
246 bool b_expired;
247 struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
248 struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
249
250 if (WARN_ON(pt_a->obj != pt_b->obj))
251 return 0;
252
253 /* Early out */
254 if (a == b)
255 return 0;
256
257 a_expired = gk20a_sync_pt_has_signaled(a);
258 b_expired = gk20a_sync_pt_has_signaled(b);
259 if (a_expired && !b_expired) {
260 /* Easy, a was earlier */
261 return -1;
262 } else if (!a_expired && b_expired) {
263 /* Easy, b was earlier */
264 return 1;
265 }
266
267 /* Both a and b are expired (trigger before min) or not
268 * expired (trigger after min), so we can use min
269 * as a reference value for __gk20a_sync_pt_compare_ref.
270 */
271 return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
272 pt_a->thresh, pt_b->thresh);
273}
274
275static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
276{
277 return obj->min;
278}
279
280static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
281 char *str, int size)
282{
283 struct gk20a_sync_timeline *obj =
284 (struct gk20a_sync_timeline *)timeline;
285 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
286}
287
288static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
289 char *str, int size)
290{
291 struct nvgpu_semaphore *s = pt->sema;
292
293 snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
294 s->location.pool->page_idx,
295 nvgpu_semaphore_get_value(s),
296 nvgpu_semaphore_read(s));
297}
298
299static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
300 int size)
301{
302 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
303
304 if (pt->sema) {
305 gk20a_sync_pt_value_str_for_sema(pt, str, size);
306 return;
307 }
308
309 snprintf(str, size, "%d", pt->thresh);
310}
311
312static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
313 .driver_name = "nvgpu_semaphore",
314 .dup = gk20a_sync_pt_dup_inst,
315 .has_signaled = gk20a_sync_pt_has_signaled,
316 .compare = gk20a_sync_pt_compare,
317 .free_pt = gk20a_sync_pt_free_inst,
318 .timeline_value_str = gk20a_sync_timeline_value_str,
319 .pt_value_str = gk20a_sync_pt_value_str,
320};
321
322/* Public API */
323
324struct sync_fence *gk20a_sync_fence_fdget(int fd)
325{
326 struct sync_fence *fence = sync_fence_fdget(fd);
327 int i;
328
329 if (!fence)
330 return NULL;
331
332 for (i = 0; i < fence->num_fences; i++) {
333 struct fence *pt = fence->cbs[i].sync_pt;
334 struct sync_pt *spt = sync_pt_from_fence(pt);
335 struct sync_timeline *t;
336
337 if (spt == NULL) {
338 sync_fence_put(fence);
339 return NULL;
340 }
341
342 t = sync_pt_parent(spt);
343 if (t->ops != &gk20a_sync_timeline_ops) {
344 sync_fence_put(fence);
345 return NULL;
346 }
347 }
348
349 return fence;
350}
351
352struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
353{
354 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
355 struct nvgpu_semaphore *sema;
356
357 nvgpu_spinlock_acquire(&pt->lock);
358 sema = pt->sema;
359 if (sema)
360 nvgpu_semaphore_get(sema);
361 nvgpu_spinlock_release(&pt->lock);
362
363 return sema;
364}
365
366void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
367{
368 sync_timeline_signal(timeline, 0);
369}
370
371void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
372{
373 sync_timeline_destroy(timeline);
374}
375
376struct sync_timeline *gk20a_sync_timeline_create(
377 const char *name)
378{
379 struct gk20a_sync_timeline *obj;
380
381 obj = (struct gk20a_sync_timeline *)
382 sync_timeline_create(&gk20a_sync_timeline_ops,
383 sizeof(struct gk20a_sync_timeline),
384 name);
385 if (!obj)
386 return NULL;
387 obj->max = 0;
388 obj->min = 0;
389 return &obj->obj;
390}
391
392struct sync_fence *gk20a_sync_fence_create(
393 struct channel_gk20a *c,
394 struct nvgpu_semaphore *sema,
395 const char *fmt, ...)
396{
397 char name[30];
398 va_list args;
399 struct sync_pt *pt;
400 struct sync_fence *fence;
401 struct gk20a *g = c->g;
402
403 struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
404 struct nvgpu_os_fence_framework *fence_framework = NULL;
405 struct gk20a_sync_timeline *timeline = NULL;
406
407 fence_framework = &os_channel_priv->fence_framework;
408
409 timeline = to_gk20a_timeline(fence_framework->timeline);
410
411 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
412 if (pt == NULL)
413 return NULL;
414
415 va_start(args, fmt);
416 vsnprintf(name, sizeof(name), fmt, args);
417 va_end(args);
418
419 fence = sync_fence_create(name, pt);
420 if (fence == NULL) {
421 sync_pt_free(pt);
422 return NULL;
423 }
424 return fence;
425}