summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2018-05-10 23:48:48 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-14 09:07:12 -0400
commita51eb9da021c2934e196c5d8be04551703e6bb5b (patch)
tree843036626eff3733759ab003fc577a1ae2e729ae /drivers/gpu/nvgpu/common
parent4dac924aba27aa46267fb39f3ed968318292a7f5 (diff)
gpu: nvgpu: move sync_gk20a under common/linux directory
sync_gk20a.* files are no longer used by core code and only invoked from linux specific implementations of the OS_FENCE framework which are under the common/linux directory. Hence, sync_gk20a.* files are also moved under common/linux. JIRA NVGPU-66 Change-Id: If623524611373d2da39b63cfb3c1e40089bf8d22 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1712900 Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/channel.c2
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c1
-rw-r--r--drivers/gpu/nvgpu/common/linux/sync_sema_android.c419
-rw-r--r--drivers/gpu/nvgpu/common/linux/sync_sema_android.h51
5 files changed, 473 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index d767374b..1e170b30 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -40,7 +40,7 @@
40#include <trace/events/gk20a.h> 40#include <trace/events/gk20a.h>
41#include <uapi/linux/nvgpu.h> 41#include <uapi/linux/nvgpu.h>
42 42
43#include "gk20a/sync_gk20a.h" 43#include "sync_sema_android.h"
44 44
45u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) 45u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
46{ 46{
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
index d4aeb6ed..2bb71c99 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
@@ -21,10 +21,11 @@
21#include <nvgpu/linux/os_fence_android.h> 21#include <nvgpu/linux/os_fence_android.h>
22#include <nvgpu/semaphore.h> 22#include <nvgpu/semaphore.h>
23 23
24#include "gk20a/sync_gk20a.h"
25#include "gk20a/channel_sync_gk20a.h" 24#include "gk20a/channel_sync_gk20a.h"
26#include "gk20a/mm_gk20a.h" 25#include "gk20a/mm_gk20a.h"
27 26
27#include "sync_sema_android.h"
28
28#include "../drivers/staging/android/sync.h" 29#include "../drivers/staging/android/sync.h"
29 30
30int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, 31int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
index b3712011..d7a72fcd 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
@@ -24,7 +24,6 @@
24 24
25#include "gk20a/gk20a.h" 25#include "gk20a/gk20a.h"
26#include "gk20a/channel_gk20a.h" 26#include "gk20a/channel_gk20a.h"
27#include "gk20a/sync_gk20a.h"
28#include "gk20a/channel_sync_gk20a.h" 27#include "gk20a/channel_sync_gk20a.h"
29#include "gk20a/mm_gk20a.h" 28#include "gk20a/mm_gk20a.h"
30 29
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
new file mode 100644
index 00000000..fad21351
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
@@ -0,0 +1,419 @@
1/*
2 * Semaphore Sync Framework Integration
3 *
4 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/hrtimer.h>
22#include <linux/module.h>
23#include <nvgpu/lock.h>
24
25#include <nvgpu/kmem.h>
26#include <nvgpu/semaphore.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/kref.h>
29#include "../common/linux/channel.h"
30
31#include "../drivers/staging/android/sync.h"
32
33#include "sync_sema_android.h"
34
35static const struct sync_timeline_ops gk20a_sync_timeline_ops;
36
37struct gk20a_sync_timeline {
38 struct sync_timeline obj;
39 u32 max;
40 u32 min;
41};
42
43/**
44 * The sync framework dups pts when merging fences. We share a single
45 * refcounted gk20a_sync_pt for each duped pt.
46 */
47struct gk20a_sync_pt {
48 struct gk20a *g;
49 struct nvgpu_ref refcount;
50 u32 thresh;
51 struct nvgpu_semaphore *sema;
52 struct gk20a_sync_timeline *obj;
53
54 /*
55 * Use a spin lock here since it will have better performance
56 * than a mutex - there should be very little contention on this
57 * lock.
58 */
59 struct nvgpu_spinlock lock;
60};
61
62struct gk20a_sync_pt_inst {
63 struct sync_pt pt;
64 struct gk20a_sync_pt *shared;
65};
66
67/**
68 * Compares sync pt values a and b, both of which will trigger either before
69 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
70 * ref). Supplying ref allows us to handle wrapping correctly.
71 *
72 * Returns -1 if a < b (a triggers before b)
73 * 0 if a = b (a and b trigger at the same time)
74 * 1 if a > b (b triggers before a)
75 */
76static int __gk20a_sync_pt_compare_ref(
77 u32 ref,
78 u32 a,
79 u32 b)
80{
81 /*
82 * We normalize both a and b by subtracting ref from them.
83 * Denote the normalized values by a_n and b_n. Note that because
84 * of wrapping, a_n and/or b_n may be negative.
85 *
86 * The normalized values a_n and b_n satisfy:
87 * - a positive value triggers before a negative value
88 * - a smaller positive value triggers before a greater positive value
89 * - a smaller negative value (greater in absolute value) triggers
90 * before a greater negative value (smaller in absolute value).
91 *
92 * Thus we can just stick to unsigned arithmetic and compare
93 * (u32)a_n to (u32)b_n.
94 *
95 * Just to reiterate the possible cases:
96 *
97 * 1A) ...ref..a....b....
98 * 1B) ...ref..b....a....
99 * 2A) ...b....ref..a.... b_n < 0
100 * 2B) ...a....ref..b.... a_n > 0
101 * 3A) ...a....b....ref.. a_n < 0, b_n < 0
102 * 3A) ...b....a....ref.. a_n < 0, b_n < 0
103 */
104 u32 a_n = a - ref;
105 u32 b_n = b - ref;
106 if (a_n < b_n)
107 return -1;
108 else if (a_n > b_n)
109 return 1;
110 else
111 return 0;
112}
113
114static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
115{
116 struct gk20a_sync_pt_inst *pti =
117 container_of(pt, struct gk20a_sync_pt_inst, pt);
118 return pti->shared;
119}
120static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
121{
122 if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
123 return NULL;
124 return (struct gk20a_sync_timeline *)obj;
125}
126
127static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
128{
129 struct gk20a_sync_pt *pt =
130 container_of(ref, struct gk20a_sync_pt, refcount);
131 struct gk20a *g = pt->g;
132
133 if (pt->sema)
134 nvgpu_semaphore_put(pt->sema);
135 nvgpu_kfree(g, pt);
136}
137
138static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
139 struct gk20a *g,
140 struct gk20a_sync_timeline *obj,
141 struct nvgpu_semaphore *sema)
142{
143 struct gk20a_sync_pt *shared;
144
145 shared = nvgpu_kzalloc(g, sizeof(*shared));
146 if (!shared)
147 return NULL;
148
149 nvgpu_ref_init(&shared->refcount);
150 shared->g = g;
151 shared->obj = obj;
152 shared->sema = sema;
153 shared->thresh = ++obj->max; /* sync framework has a lock */
154
155 nvgpu_spinlock_init(&shared->lock);
156
157 nvgpu_semaphore_get(sema);
158
159 return shared;
160}
161
162static struct sync_pt *gk20a_sync_pt_create_inst(
163 struct gk20a *g,
164 struct gk20a_sync_timeline *obj,
165 struct nvgpu_semaphore *sema)
166{
167 struct gk20a_sync_pt_inst *pti;
168
169 pti = (struct gk20a_sync_pt_inst *)
170 sync_pt_create(&obj->obj, sizeof(*pti));
171 if (!pti)
172 return NULL;
173
174 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
175 if (!pti->shared) {
176 sync_pt_free(&pti->pt);
177 return NULL;
178 }
179 return &pti->pt;
180}
181
182static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
183{
184 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
185 if (pt)
186 nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
187}
188
189static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
190{
191 struct gk20a_sync_pt_inst *pti;
192 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
193
194 pti = (struct gk20a_sync_pt_inst *)
195 sync_pt_create(&pt->obj->obj, sizeof(*pti));
196 if (!pti)
197 return NULL;
198 pti->shared = pt;
199 nvgpu_ref_get(&pt->refcount);
200 return &pti->pt;
201}
202
203/*
204 * This function must be able to run on the same sync_pt concurrently. This
205 * requires a lock to protect access to the sync_pt's internal data structures
206 * which are modified as a side effect of calling this function.
207 */
208static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
209{
210 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
211 struct gk20a_sync_timeline *obj = pt->obj;
212 bool signaled = true;
213
214 nvgpu_spinlock_acquire(&pt->lock);
215 if (!pt->sema)
216 goto done;
217
218 /* Acquired == not realeased yet == active == not signaled. */
219 signaled = !nvgpu_semaphore_is_acquired(pt->sema);
220
221 if (signaled) {
222 /* Update min if necessary. */
223 if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
224 obj->min) == 1)
225 obj->min = pt->thresh;
226
227 /* Release the semaphore to the pool. */
228 nvgpu_semaphore_put(pt->sema);
229 pt->sema = NULL;
230 }
231done:
232 nvgpu_spinlock_release(&pt->lock);
233
234 return signaled;
235}
236
237static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
238{
239 bool a_expired;
240 bool b_expired;
241 struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
242 struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
243
244 if (WARN_ON(pt_a->obj != pt_b->obj))
245 return 0;
246
247 /* Early out */
248 if (a == b)
249 return 0;
250
251 a_expired = gk20a_sync_pt_has_signaled(a);
252 b_expired = gk20a_sync_pt_has_signaled(b);
253 if (a_expired && !b_expired) {
254 /* Easy, a was earlier */
255 return -1;
256 } else if (!a_expired && b_expired) {
257 /* Easy, b was earlier */
258 return 1;
259 }
260
261 /* Both a and b are expired (trigger before min) or not
262 * expired (trigger after min), so we can use min
263 * as a reference value for __gk20a_sync_pt_compare_ref.
264 */
265 return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
266 pt_a->thresh, pt_b->thresh);
267}
268
269static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
270{
271 return obj->min;
272}
273
274static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
275 char *str, int size)
276{
277 struct gk20a_sync_timeline *obj =
278 (struct gk20a_sync_timeline *)timeline;
279 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
280}
281
282static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
283 char *str, int size)
284{
285 struct nvgpu_semaphore *s = pt->sema;
286
287 snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
288 s->location.pool->page_idx,
289 nvgpu_semaphore_get_value(s),
290 nvgpu_semaphore_read(s));
291}
292
293static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
294 int size)
295{
296 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
297
298 if (pt->sema) {
299 gk20a_sync_pt_value_str_for_sema(pt, str, size);
300 return;
301 }
302
303 snprintf(str, size, "%d", pt->thresh);
304}
305
306static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
307 .driver_name = "nvgpu_semaphore",
308 .dup = gk20a_sync_pt_dup_inst,
309 .has_signaled = gk20a_sync_pt_has_signaled,
310 .compare = gk20a_sync_pt_compare,
311 .free_pt = gk20a_sync_pt_free_inst,
312 .timeline_value_str = gk20a_sync_timeline_value_str,
313 .pt_value_str = gk20a_sync_pt_value_str,
314};
315
316/* Public API */
317
318struct sync_fence *gk20a_sync_fence_fdget(int fd)
319{
320 struct sync_fence *fence = sync_fence_fdget(fd);
321 int i;
322
323 if (!fence)
324 return NULL;
325
326 for (i = 0; i < fence->num_fences; i++) {
327 struct fence *pt = fence->cbs[i].sync_pt;
328 struct sync_pt *spt = sync_pt_from_fence(pt);
329 struct sync_timeline *t;
330
331 if (spt == NULL) {
332 sync_fence_put(fence);
333 return NULL;
334 }
335
336 t = sync_pt_parent(spt);
337 if (t->ops != &gk20a_sync_timeline_ops) {
338 sync_fence_put(fence);
339 return NULL;
340 }
341 }
342
343 return fence;
344}
345
346struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
347{
348 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
349 struct nvgpu_semaphore *sema;
350
351 nvgpu_spinlock_acquire(&pt->lock);
352 sema = pt->sema;
353 if (sema)
354 nvgpu_semaphore_get(sema);
355 nvgpu_spinlock_release(&pt->lock);
356
357 return sema;
358}
359
360void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
361{
362 sync_timeline_signal(timeline, 0);
363}
364
365void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
366{
367 sync_timeline_destroy(timeline);
368}
369
370struct sync_timeline *gk20a_sync_timeline_create(
371 const char *name)
372{
373 struct gk20a_sync_timeline *obj;
374
375 obj = (struct gk20a_sync_timeline *)
376 sync_timeline_create(&gk20a_sync_timeline_ops,
377 sizeof(struct gk20a_sync_timeline),
378 name);
379 if (!obj)
380 return NULL;
381 obj->max = 0;
382 obj->min = 0;
383 return &obj->obj;
384}
385
386struct sync_fence *gk20a_sync_fence_create(
387 struct channel_gk20a *c,
388 struct nvgpu_semaphore *sema,
389 const char *fmt, ...)
390{
391 char name[30];
392 va_list args;
393 struct sync_pt *pt;
394 struct sync_fence *fence;
395 struct gk20a *g = c->g;
396
397 struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
398 struct nvgpu_os_fence_framework *fence_framework = NULL;
399 struct gk20a_sync_timeline *timeline = NULL;
400
401 fence_framework = &os_channel_priv->fence_framework;
402
403 timeline = to_gk20a_timeline(fence_framework->timeline);
404
405 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
406 if (pt == NULL)
407 return NULL;
408
409 va_start(args, fmt);
410 vsnprintf(name, sizeof(name), fmt, args);
411 va_end(args);
412
413 fence = sync_fence_create(name, pt);
414 if (fence == NULL) {
415 sync_pt_free(pt);
416 return NULL;
417 }
418 return fence;
419}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
new file mode 100644
index 00000000..4fca7bed
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
@@ -0,0 +1,51 @@
1/*
2 * Semaphore Sync Framework Integration
3 *
4 * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _GK20A_SYNC_H_
20#define _GK20A_SYNC_H_
21
22struct sync_timeline;
23struct sync_fence;
24struct sync_pt;
25struct nvgpu_semaphore;
26struct fence;
27
28#ifdef CONFIG_SYNC
29struct sync_timeline *gk20a_sync_timeline_create(const char *name);
30void gk20a_sync_timeline_destroy(struct sync_timeline *);
31void gk20a_sync_timeline_signal(struct sync_timeline *);
32struct sync_fence *gk20a_sync_fence_create(
33 struct channel_gk20a *c,
34 struct nvgpu_semaphore *,
35 const char *fmt, ...);
36struct sync_fence *gk20a_sync_fence_fdget(int fd);
37struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
38#else
39static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
40static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
41static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
42{
43 return NULL;
44}
45static inline struct sync_timeline *gk20a_sync_timeline_create(
46 const char *name) {
47 return NULL;
48}
49#endif
50
51#endif