diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 942 |
1 files changed, 942 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c new file mode 100644 index 00000000..d83684e4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -0,0 +1,942 @@ | |||
1 | /* | ||
2 | * GK20A Channel Synchronization Abstraction | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/semaphore.h> | ||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/log.h> | ||
28 | #include <nvgpu/atomic.h> | ||
29 | #include <nvgpu/bug.h> | ||
30 | #include <nvgpu/list.h> | ||
31 | #include <nvgpu/nvhost.h> | ||
32 | |||
33 | #include "channel_sync_gk20a.h" | ||
34 | #include "gk20a.h" | ||
35 | #include "fence_gk20a.h" | ||
36 | #include "sync_gk20a.h" | ||
37 | #include "mm_gk20a.h" | ||
38 | |||
39 | #ifdef CONFIG_SYNC | ||
40 | #include "../drivers/staging/android/sync.h" | ||
41 | #endif | ||
42 | |||
43 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
44 | |||
45 | struct gk20a_channel_syncpt { | ||
46 | struct gk20a_channel_sync ops; | ||
47 | struct channel_gk20a *c; | ||
48 | struct nvgpu_nvhost_dev *nvhost_dev; | ||
49 | u32 id; | ||
50 | struct nvgpu_mem syncpt_buf; | ||
51 | }; | ||
52 | |||
53 | static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | ||
54 | u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, | ||
55 | struct gk20a_fence *fence) | ||
56 | { | ||
57 | struct gk20a_channel_syncpt *sp = | ||
58 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
59 | struct channel_gk20a *c = sp->c; | ||
60 | int err = 0; | ||
61 | |||
62 | if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id)) { | ||
63 | nvgpu_warn(c->g, "invalid wait id in gpfifo submit, elided"); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh)) | ||
68 | return 0; | ||
69 | |||
70 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
71 | c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd); | ||
72 | if (err) { | ||
73 | nvgpu_err(c->g, | ||
74 | "not enough priv cmd buffer space"); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", | ||
79 | id, sp->c->vm->syncpt_ro_map_gpu_va); | ||
80 | c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id, | ||
81 | thresh, c->vm->syncpt_ro_map_gpu_va); | ||
82 | |||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | ||
87 | struct priv_cmd_entry *wait_cmd, | ||
88 | struct gk20a_fence *fence) | ||
89 | { | ||
90 | #ifdef CONFIG_SYNC | ||
91 | int i; | ||
92 | int num_wait_cmds; | ||
93 | struct sync_fence *sync_fence; | ||
94 | struct sync_pt *pt; | ||
95 | struct gk20a_channel_syncpt *sp = | ||
96 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
97 | struct channel_gk20a *c = sp->c; | ||
98 | u32 wait_id; | ||
99 | int err = 0; | ||
100 | u32 wait_cmd_size = 0; | ||
101 | |||
102 | sync_fence = nvgpu_nvhost_sync_fdget(fd); | ||
103 | if (!sync_fence) | ||
104 | return -EINVAL; | ||
105 | |||
106 | /* validate syncpt ids */ | ||
107 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
108 | pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); | ||
109 | wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
110 | if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
111 | sp->nvhost_dev, wait_id)) { | ||
112 | sync_fence_put(sync_fence); | ||
113 | return -EINVAL; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); | ||
118 | if (num_wait_cmds == 0) { | ||
119 | sync_fence_put(sync_fence); | ||
120 | return 0; | ||
121 | } | ||
122 | wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); | ||
123 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
124 | wait_cmd_size * num_wait_cmds, | ||
125 | wait_cmd); | ||
126 | if (err) { | ||
127 | nvgpu_err(c->g, | ||
128 | "not enough priv cmd buffer space"); | ||
129 | sync_fence_put(sync_fence); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | i = 0; | ||
134 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
135 | struct fence *f = sync_fence->cbs[i].sync_pt; | ||
136 | struct sync_pt *pt = sync_pt_from_fence(f); | ||
137 | u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
138 | u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); | ||
139 | |||
140 | if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, | ||
141 | wait_id, wait_value)) { | ||
142 | nvgpu_memset(c->g, wait_cmd->mem, | ||
143 | (wait_cmd->off + i * wait_cmd_size) * sizeof(u32), | ||
144 | 0, wait_cmd_size * sizeof(u32)); | ||
145 | } else { | ||
146 | nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", | ||
147 | wait_id, sp->syncpt_buf.gpu_va); | ||
148 | c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, | ||
149 | i * wait_cmd_size, wait_id, wait_value, | ||
150 | c->vm->syncpt_ro_map_gpu_va); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | WARN_ON(i != num_wait_cmds); | ||
155 | sync_fence_put(sync_fence); | ||
156 | |||
157 | return 0; | ||
158 | #else | ||
159 | return -ENODEV; | ||
160 | #endif | ||
161 | } | ||
162 | |||
163 | static void gk20a_channel_syncpt_update(void *priv, int nr_completed) | ||
164 | { | ||
165 | struct channel_gk20a *ch = priv; | ||
166 | |||
167 | gk20a_channel_update(ch); | ||
168 | |||
169 | /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ | ||
170 | gk20a_channel_put(ch); | ||
171 | } | ||
172 | |||
173 | static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | ||
174 | bool wfi_cmd, | ||
175 | bool register_irq, | ||
176 | struct priv_cmd_entry *incr_cmd, | ||
177 | struct gk20a_fence *fence, | ||
178 | bool need_sync_fence) | ||
179 | { | ||
180 | u32 thresh; | ||
181 | int err; | ||
182 | struct gk20a_channel_syncpt *sp = | ||
183 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
184 | struct channel_gk20a *c = sp->c; | ||
185 | |||
186 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
187 | c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd), | ||
188 | incr_cmd); | ||
189 | if (err) | ||
190 | return err; | ||
191 | |||
192 | nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", | ||
193 | sp->id, sp->syncpt_buf.gpu_va); | ||
194 | c->g->ops.fifo.add_syncpt_incr_cmd(c->g, wfi_cmd, | ||
195 | incr_cmd, sp->id, sp->syncpt_buf.gpu_va); | ||
196 | |||
197 | thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id, 2); | ||
198 | |||
199 | if (register_irq) { | ||
200 | struct channel_gk20a *referenced = gk20a_channel_get(c); | ||
201 | |||
202 | WARN_ON(!referenced); | ||
203 | |||
204 | if (referenced) { | ||
205 | /* note: channel_put() is in | ||
206 | * gk20a_channel_syncpt_update() */ | ||
207 | |||
208 | err = nvgpu_nvhost_intr_register_notifier( | ||
209 | sp->nvhost_dev, | ||
210 | sp->id, thresh, | ||
211 | gk20a_channel_syncpt_update, c); | ||
212 | if (err) | ||
213 | gk20a_channel_put(referenced); | ||
214 | |||
215 | /* Adding interrupt action should | ||
216 | * never fail. A proper error handling | ||
217 | * here would require us to decrement | ||
218 | * the syncpt max back to its original | ||
219 | * value. */ | ||
220 | WARN(err, | ||
221 | "failed to set submit complete interrupt"); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, sp->id, thresh, | ||
226 | wfi_cmd, need_sync_fence); | ||
227 | if (err) | ||
228 | goto clean_up_priv_cmd; | ||
229 | |||
230 | return 0; | ||
231 | |||
232 | clean_up_priv_cmd: | ||
233 | gk20a_free_priv_cmdbuf(c, incr_cmd); | ||
234 | return err; | ||
235 | } | ||
236 | |||
237 | static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, | ||
238 | struct priv_cmd_entry *entry, | ||
239 | struct gk20a_fence *fence) | ||
240 | { | ||
241 | return __gk20a_channel_syncpt_incr(s, | ||
242 | true /* wfi */, | ||
243 | false /* no irq handler */, | ||
244 | entry, fence, true); | ||
245 | } | ||
246 | |||
247 | static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | ||
248 | struct priv_cmd_entry *entry, | ||
249 | struct gk20a_fence *fence, | ||
250 | bool need_sync_fence, | ||
251 | bool register_irq) | ||
252 | { | ||
253 | /* Don't put wfi cmd to this one since we're not returning | ||
254 | * a fence to user space. */ | ||
255 | return __gk20a_channel_syncpt_incr(s, | ||
256 | false /* no wfi */, | ||
257 | register_irq /* register irq */, | ||
258 | entry, fence, need_sync_fence); | ||
259 | } | ||
260 | |||
261 | static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, | ||
262 | int wait_fence_fd, | ||
263 | struct priv_cmd_entry *entry, | ||
264 | struct gk20a_fence *fence, | ||
265 | bool wfi, | ||
266 | bool need_sync_fence, | ||
267 | bool register_irq) | ||
268 | { | ||
269 | /* Need to do 'wfi + host incr' since we return the fence | ||
270 | * to user space. */ | ||
271 | return __gk20a_channel_syncpt_incr(s, | ||
272 | wfi, | ||
273 | register_irq /* register irq */, | ||
274 | entry, fence, need_sync_fence); | ||
275 | } | ||
276 | |||
277 | static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) | ||
278 | { | ||
279 | struct gk20a_channel_syncpt *sp = | ||
280 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
281 | nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id); | ||
282 | } | ||
283 | |||
284 | static void gk20a_channel_syncpt_signal_timeline( | ||
285 | struct gk20a_channel_sync *s) | ||
286 | { | ||
287 | /* Nothing to do. */ | ||
288 | } | ||
289 | |||
290 | static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s) | ||
291 | { | ||
292 | struct gk20a_channel_syncpt *sp = | ||
293 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
294 | return sp->id; | ||
295 | } | ||
296 | |||
297 | static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) | ||
298 | { | ||
299 | struct gk20a_channel_syncpt *sp = | ||
300 | container_of(s, struct gk20a_channel_syncpt, ops); | ||
301 | |||
302 | |||
303 | sp->c->g->ops.fifo.free_syncpt_buf(sp->c, &sp->syncpt_buf); | ||
304 | |||
305 | nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id); | ||
306 | nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost_dev, sp->id); | ||
307 | nvgpu_kfree(sp->c->g, sp); | ||
308 | } | ||
309 | |||
310 | static struct gk20a_channel_sync * | ||
311 | gk20a_channel_syncpt_create(struct channel_gk20a *c) | ||
312 | { | ||
313 | struct gk20a_channel_syncpt *sp; | ||
314 | char syncpt_name[32]; | ||
315 | |||
316 | sp = nvgpu_kzalloc(c->g, sizeof(*sp)); | ||
317 | if (!sp) | ||
318 | return NULL; | ||
319 | |||
320 | sp->c = c; | ||
321 | sp->nvhost_dev = c->g->nvhost_dev; | ||
322 | |||
323 | snprintf(syncpt_name, sizeof(syncpt_name), | ||
324 | "%s_%d", c->g->name, c->chid); | ||
325 | |||
326 | sp->id = nvgpu_nvhost_get_syncpt_host_managed(sp->nvhost_dev, | ||
327 | c->chid, syncpt_name); | ||
328 | if (!sp->id) { | ||
329 | nvgpu_kfree(c->g, sp); | ||
330 | nvgpu_err(c->g, "failed to get free syncpt"); | ||
331 | return NULL; | ||
332 | } | ||
333 | |||
334 | sp->c->g->ops.fifo.alloc_syncpt_buf(sp->c, sp->id, | ||
335 | &sp->syncpt_buf); | ||
336 | |||
337 | nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id); | ||
338 | |||
339 | nvgpu_atomic_set(&sp->ops.refcount, 0); | ||
340 | sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt; | ||
341 | sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd; | ||
342 | sp->ops.incr = gk20a_channel_syncpt_incr; | ||
343 | sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi; | ||
344 | sp->ops.incr_user = gk20a_channel_syncpt_incr_user; | ||
345 | sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; | ||
346 | sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; | ||
347 | sp->ops.syncpt_id = gk20a_channel_syncpt_id; | ||
348 | sp->ops.destroy = gk20a_channel_syncpt_destroy; | ||
349 | |||
350 | return &sp->ops; | ||
351 | } | ||
352 | #endif /* CONFIG_TEGRA_GK20A_NVHOST */ | ||
353 | |||
354 | struct gk20a_channel_semaphore { | ||
355 | struct gk20a_channel_sync ops; | ||
356 | struct channel_gk20a *c; | ||
357 | |||
358 | /* A semaphore pool owned by this channel. */ | ||
359 | struct nvgpu_semaphore_pool *pool; | ||
360 | |||
361 | /* A sync timeline that advances when gpu completes work. */ | ||
362 | struct sync_timeline *timeline; | ||
363 | }; | ||
364 | |||
365 | #ifdef CONFIG_SYNC | ||
366 | struct wait_fence_work { | ||
367 | struct sync_fence_waiter waiter; | ||
368 | struct sync_fence *fence; | ||
369 | struct channel_gk20a *ch; | ||
370 | struct nvgpu_semaphore *sema; | ||
371 | struct gk20a *g; | ||
372 | struct nvgpu_list_node entry; | ||
373 | }; | ||
374 | |||
375 | static inline struct wait_fence_work * | ||
376 | wait_fence_work_from_entry(struct nvgpu_list_node *node) | ||
377 | { | ||
378 | return (struct wait_fence_work *) | ||
379 | ((uintptr_t)node - offsetof(struct wait_fence_work, entry)); | ||
380 | }; | ||
381 | |||
382 | /* | ||
383 | * Keep track of all the pending waits on semaphores that exist for a GPU. This | ||
384 | * has to be done because the waits on fences backed by semaphores are | ||
385 | * asynchronous so it's impossible to otherwise know when they will fire. During | ||
386 | * driver cleanup this list can be checked and all existing waits can be | ||
387 | * canceled. | ||
388 | */ | ||
389 | static void gk20a_add_pending_sema_wait(struct gk20a *g, | ||
390 | struct wait_fence_work *work) | ||
391 | { | ||
392 | nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock); | ||
393 | nvgpu_list_add(&work->entry, &g->pending_sema_waits); | ||
394 | nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock); | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Copy the list head from the pending wait list to the passed list and | ||
399 | * then delete the entire pending list. | ||
400 | */ | ||
401 | static void gk20a_start_sema_wait_cancel(struct gk20a *g, | ||
402 | struct nvgpu_list_node *list) | ||
403 | { | ||
404 | nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock); | ||
405 | nvgpu_list_replace_init(&g->pending_sema_waits, list); | ||
406 | nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock); | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * During shutdown this should be called to make sure that any pending sema | ||
411 | * waits are canceled. This is a fairly delicate and tricky bit of code. Here's | ||
412 | * how it works. | ||
413 | * | ||
414 | * Every time a semaphore wait is initiated in SW the wait_fence_work struct is | ||
415 | * added to the pending_sema_waits list. When the semaphore launcher code runs | ||
416 | * it checks the pending_sema_waits list. If this list is non-empty that means | ||
417 | * that the wait_fence_work struct must be present and can be removed. | ||
418 | * | ||
419 | * When the driver shuts down one of the steps is to cancel pending sema waits. | ||
420 | * To do this the entire list of pending sema waits is removed (and stored in a | ||
421 | * separate local list). So now, if the semaphore launcher code runs it will see | ||
422 | * that the pending_sema_waits list is empty and knows that it no longer owns | ||
423 | * the wait_fence_work struct. | ||
424 | */ | ||
425 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g) | ||
426 | { | ||
427 | struct wait_fence_work *work; | ||
428 | struct nvgpu_list_node local_pending_sema_waits; | ||
429 | |||
430 | gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits); | ||
431 | |||
432 | while (!nvgpu_list_empty(&local_pending_sema_waits)) { | ||
433 | int ret; | ||
434 | |||
435 | work = nvgpu_list_first_entry(&local_pending_sema_waits, | ||
436 | wait_fence_work, | ||
437 | entry); | ||
438 | |||
439 | nvgpu_list_del(&work->entry); | ||
440 | |||
441 | /* | ||
442 | * Only nvgpu_kfree() work if the cancel is successful. | ||
443 | * Otherwise it's in use by the | ||
444 | * gk20a_channel_semaphore_launcher() code. | ||
445 | */ | ||
446 | ret = sync_fence_cancel_async(work->fence, &work->waiter); | ||
447 | if (ret == 0) | ||
448 | nvgpu_kfree(g, work); | ||
449 | } | ||
450 | } | ||
451 | |||
452 | static void gk20a_channel_semaphore_launcher( | ||
453 | struct sync_fence *fence, | ||
454 | struct sync_fence_waiter *waiter) | ||
455 | { | ||
456 | int err; | ||
457 | struct wait_fence_work *w = | ||
458 | container_of(waiter, struct wait_fence_work, waiter); | ||
459 | struct gk20a *g = w->g; | ||
460 | |||
461 | /* | ||
462 | * This spinlock must protect a _very_ small critical section - | ||
463 | * otherwise it's possible that the deterministic submit path suffers. | ||
464 | */ | ||
465 | nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock); | ||
466 | if (!nvgpu_list_empty(&g->pending_sema_waits)) | ||
467 | nvgpu_list_del(&w->entry); | ||
468 | nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock); | ||
469 | |||
470 | gk20a_dbg_info("waiting for pre fence %p '%s'", | ||
471 | fence, fence->name); | ||
472 | err = sync_fence_wait(fence, -1); | ||
473 | if (err < 0) | ||
474 | nvgpu_err(g, "error waiting pre-fence: %d", err); | ||
475 | |||
476 | gk20a_dbg_info( | ||
477 | "wait completed (%d) for fence %p '%s', triggering gpu work", | ||
478 | err, fence, fence->name); | ||
479 | sync_fence_put(fence); | ||
480 | nvgpu_semaphore_release(w->sema); | ||
481 | nvgpu_semaphore_put(w->sema); | ||
482 | nvgpu_kfree(g, w); | ||
483 | } | ||
484 | #endif | ||
485 | |||
486 | static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | ||
487 | struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, | ||
488 | int cmd_size, bool acquire, bool wfi) | ||
489 | { | ||
490 | int ch = c->chid; | ||
491 | u32 ob, off = cmd->off; | ||
492 | u64 va; | ||
493 | |||
494 | ob = off; | ||
495 | |||
496 | /* | ||
497 | * RO for acquire (since we just need to read the mem) and RW for | ||
498 | * release since we will need to write back to the semaphore memory. | ||
499 | */ | ||
500 | va = acquire ? nvgpu_semaphore_gpu_ro_va(s) : | ||
501 | nvgpu_semaphore_gpu_rw_va(s); | ||
502 | |||
503 | /* | ||
504 | * If the op is not an acquire (so therefor a release) we should | ||
505 | * incr the underlying sema next_value. | ||
506 | */ | ||
507 | if (!acquire) | ||
508 | nvgpu_semaphore_incr(s); | ||
509 | |||
510 | /* semaphore_a */ | ||
511 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); | ||
512 | /* offset_upper */ | ||
513 | nvgpu_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff); | ||
514 | /* semaphore_b */ | ||
515 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005); | ||
516 | /* offset */ | ||
517 | nvgpu_mem_wr32(g, cmd->mem, off++, va & 0xffffffff); | ||
518 | |||
519 | if (acquire) { | ||
520 | /* semaphore_c */ | ||
521 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
522 | /* payload */ | ||
523 | nvgpu_mem_wr32(g, cmd->mem, off++, | ||
524 | nvgpu_semaphore_get_value(s)); | ||
525 | /* semaphore_d */ | ||
526 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); | ||
527 | /* operation: acq_geq, switch_en */ | ||
528 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); | ||
529 | } else { | ||
530 | /* semaphore_c */ | ||
531 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); | ||
532 | /* payload */ | ||
533 | nvgpu_mem_wr32(g, cmd->mem, off++, | ||
534 | nvgpu_semaphore_get_value(s)); | ||
535 | /* semaphore_d */ | ||
536 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); | ||
537 | /* operation: release, wfi */ | ||
538 | nvgpu_mem_wr32(g, cmd->mem, off++, | ||
539 | 0x2 | ((wfi ? 0x0 : 0x1) << 20)); | ||
540 | /* non_stall_int */ | ||
541 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008); | ||
542 | /* ignored */ | ||
543 | nvgpu_mem_wr32(g, cmd->mem, off++, 0); | ||
544 | } | ||
545 | |||
546 | if (acquire) | ||
547 | gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" | ||
548 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", | ||
549 | ch, nvgpu_semaphore_get_value(s), | ||
550 | s->hw_sema->ch->chid, va, cmd->gva, | ||
551 | cmd->mem->gpu_va, ob); | ||
552 | else | ||
553 | gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " | ||
554 | "cmd_mem=0x%llx b=0x%llx off=%u", | ||
555 | ch, nvgpu_semaphore_get_value(s), | ||
556 | nvgpu_semaphore_read(s), va, cmd->gva, | ||
557 | cmd->mem->gpu_va, ob); | ||
558 | } | ||
559 | |||
560 | static int gk20a_channel_semaphore_wait_syncpt( | ||
561 | struct gk20a_channel_sync *s, u32 id, | ||
562 | u32 thresh, struct priv_cmd_entry *entry, | ||
563 | struct gk20a_fence *fence) | ||
564 | { | ||
565 | struct gk20a_channel_semaphore *sema = | ||
566 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
567 | struct gk20a *g = sema->c->g; | ||
568 | nvgpu_err(g, "trying to use syncpoint synchronization"); | ||
569 | return -ENODEV; | ||
570 | } | ||
571 | |||
572 | #ifdef CONFIG_SYNC | ||
573 | /* | ||
574 | * Attempt a fast path for waiting on a sync_fence. Basically if the passed | ||
575 | * sync_fence is backed by a nvgpu_semaphore then there's no reason to go | ||
576 | * through the rigmarole of setting up a separate semaphore which waits on an | ||
577 | * interrupt from the GPU and then triggers a worker thread to execute a SW | ||
578 | * based semaphore release. Instead just have the GPU wait on the same semaphore | ||
579 | * that is going to be incremented by the GPU. | ||
580 | * | ||
581 | * This function returns 2 possible values: -ENODEV or 0 on success. In the case | ||
582 | * of -ENODEV the fastpath cannot be taken due to the fence not being backed by | ||
583 | * a GPU semaphore. | ||
584 | */ | ||
585 | static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | ||
586 | struct sync_fence *fence, | ||
587 | struct priv_cmd_entry *wait_cmd, | ||
588 | struct nvgpu_semaphore **fp_sema) | ||
589 | { | ||
590 | struct nvgpu_semaphore *sema; | ||
591 | int err; | ||
592 | |||
593 | if (!gk20a_is_sema_backed_sync_fence(fence)) | ||
594 | return -ENODEV; | ||
595 | |||
596 | sema = gk20a_sync_fence_get_sema(fence); | ||
597 | |||
598 | /* | ||
599 | * If there's no underlying sema then that means the underlying sema has | ||
600 | * already signaled. | ||
601 | */ | ||
602 | if (!sema) { | ||
603 | *fp_sema = NULL; | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); | ||
608 | if (err) | ||
609 | return err; | ||
610 | |||
611 | nvgpu_semaphore_get(sema); | ||
612 | BUG_ON(!nvgpu_atomic_read(&sema->value)); | ||
613 | add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false); | ||
614 | |||
615 | /* | ||
616 | * Make sure that gk20a_channel_semaphore_wait_fd() can create another | ||
617 | * fence with the underlying semaphore. | ||
618 | */ | ||
619 | *fp_sema = sema; | ||
620 | |||
621 | return 0; | ||
622 | } | ||
623 | #endif | ||
624 | |||
625 | static int gk20a_channel_semaphore_wait_fd( | ||
626 | struct gk20a_channel_sync *s, int fd, | ||
627 | struct priv_cmd_entry *entry, | ||
628 | struct gk20a_fence *fence) | ||
629 | { | ||
630 | struct gk20a_channel_semaphore *sema = | ||
631 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
632 | struct channel_gk20a *c = sema->c; | ||
633 | #ifdef CONFIG_SYNC | ||
634 | struct nvgpu_semaphore *fp_sema; | ||
635 | struct sync_fence *sync_fence; | ||
636 | struct priv_cmd_entry *wait_cmd = entry; | ||
637 | struct wait_fence_work *w = NULL; | ||
638 | int err, ret, status; | ||
639 | |||
640 | sync_fence = gk20a_sync_fence_fdget(fd); | ||
641 | if (!sync_fence) | ||
642 | return -EINVAL; | ||
643 | |||
644 | ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema); | ||
645 | if (ret == 0) { | ||
646 | if (fp_sema) { | ||
647 | err = gk20a_fence_from_semaphore(c->g, fence, | ||
648 | sema->timeline, | ||
649 | fp_sema, | ||
650 | &c->semaphore_wq, | ||
651 | false, false); | ||
652 | if (err) { | ||
653 | nvgpu_semaphore_put(fp_sema); | ||
654 | goto clean_up_priv_cmd; | ||
655 | } | ||
656 | } else | ||
657 | /* | ||
658 | * Init an empty fence. It will instantly return | ||
659 | * from gk20a_fence_wait(). | ||
660 | */ | ||
661 | gk20a_init_fence(fence, NULL, NULL, false); | ||
662 | |||
663 | sync_fence_put(sync_fence); | ||
664 | goto skip_slow_path; | ||
665 | } | ||
666 | |||
667 | /* If the fence has signaled there is no reason to wait on it. */ | ||
668 | status = atomic_read(&sync_fence->status); | ||
669 | if (status == 0) { | ||
670 | sync_fence_put(sync_fence); | ||
671 | goto skip_slow_path; | ||
672 | } | ||
673 | |||
674 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); | ||
675 | if (err) { | ||
676 | nvgpu_err(c->g, | ||
677 | "not enough priv cmd buffer space"); | ||
678 | goto clean_up_sync_fence; | ||
679 | } | ||
680 | |||
681 | w = nvgpu_kzalloc(c->g, sizeof(*w)); | ||
682 | if (!w) { | ||
683 | err = -ENOMEM; | ||
684 | goto clean_up_priv_cmd; | ||
685 | } | ||
686 | |||
687 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); | ||
688 | w->fence = sync_fence; | ||
689 | w->g = c->g; | ||
690 | w->ch = c; | ||
691 | w->sema = nvgpu_semaphore_alloc(c); | ||
692 | if (!w->sema) { | ||
693 | nvgpu_err(c->g, "ran out of semaphores"); | ||
694 | err = -ENOMEM; | ||
695 | goto clean_up_worker; | ||
696 | } | ||
697 | |||
698 | /* worker takes one reference */ | ||
699 | nvgpu_semaphore_get(w->sema); | ||
700 | nvgpu_semaphore_incr(w->sema); | ||
701 | |||
702 | /* GPU unblocked when the semaphore value increments. */ | ||
703 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); | ||
704 | |||
705 | /* | ||
706 | * We need to create the fence before adding the waiter to ensure | ||
707 | * that we properly clean up in the event the sync_fence has | ||
708 | * already signaled | ||
709 | */ | ||
710 | err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, | ||
711 | &c->semaphore_wq, false, false); | ||
712 | if (err) | ||
713 | goto clean_up_sema; | ||
714 | |||
715 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | ||
716 | gk20a_add_pending_sema_wait(c->g, w); | ||
717 | |||
718 | /* | ||
719 | * If the sync_fence has already signaled then the above async_wait | ||
720 | * will never trigger. This causes the semaphore release op to never | ||
721 | * happen which, in turn, hangs the GPU. That's bad. So let's just | ||
722 | * do the nvgpu_semaphore_release() right now. | ||
723 | */ | ||
724 | if (ret == 1) { | ||
725 | sync_fence_put(sync_fence); | ||
726 | nvgpu_semaphore_release(w->sema); | ||
727 | nvgpu_semaphore_put(w->sema); | ||
728 | } | ||
729 | |||
730 | skip_slow_path: | ||
731 | return 0; | ||
732 | |||
733 | clean_up_sema: | ||
734 | /* | ||
735 | * Release the refs to the semaphore, including | ||
736 | * the one for the worker since it will never run. | ||
737 | */ | ||
738 | nvgpu_semaphore_put(w->sema); | ||
739 | nvgpu_semaphore_put(w->sema); | ||
740 | clean_up_worker: | ||
741 | nvgpu_kfree(c->g, w); | ||
742 | clean_up_priv_cmd: | ||
743 | gk20a_free_priv_cmdbuf(c, entry); | ||
744 | clean_up_sync_fence: | ||
745 | sync_fence_put(sync_fence); | ||
746 | return err; | ||
747 | #else | ||
748 | nvgpu_err(c->g, | ||
749 | "trying to use sync fds with CONFIG_SYNC disabled"); | ||
750 | return -ENODEV; | ||
751 | #endif | ||
752 | } | ||
753 | |||
754 | static int __gk20a_channel_semaphore_incr( | ||
755 | struct gk20a_channel_sync *s, bool wfi_cmd, | ||
756 | struct priv_cmd_entry *incr_cmd, | ||
757 | struct gk20a_fence *fence, | ||
758 | bool need_sync_fence) | ||
759 | { | ||
760 | int incr_cmd_size; | ||
761 | struct gk20a_channel_semaphore *sp = | ||
762 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
763 | struct channel_gk20a *c = sp->c; | ||
764 | struct nvgpu_semaphore *semaphore; | ||
765 | int err = 0; | ||
766 | |||
767 | semaphore = nvgpu_semaphore_alloc(c); | ||
768 | if (!semaphore) { | ||
769 | nvgpu_err(c->g, | ||
770 | "ran out of semaphores"); | ||
771 | return -ENOMEM; | ||
772 | } | ||
773 | |||
774 | incr_cmd_size = 10; | ||
775 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); | ||
776 | if (err) { | ||
777 | nvgpu_err(c->g, | ||
778 | "not enough priv cmd buffer space"); | ||
779 | goto clean_up_sema; | ||
780 | } | ||
781 | |||
782 | /* Release the completion semaphore. */ | ||
783 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); | ||
784 | |||
785 | err = gk20a_fence_from_semaphore(c->g, fence, | ||
786 | sp->timeline, semaphore, | ||
787 | &c->semaphore_wq, | ||
788 | wfi_cmd, | ||
789 | need_sync_fence); | ||
790 | if (err) | ||
791 | goto clean_up_sema; | ||
792 | |||
793 | return 0; | ||
794 | |||
795 | clean_up_sema: | ||
796 | nvgpu_semaphore_put(semaphore); | ||
797 | return err; | ||
798 | } | ||
799 | |||
800 | static int gk20a_channel_semaphore_incr_wfi( | ||
801 | struct gk20a_channel_sync *s, | ||
802 | struct priv_cmd_entry *entry, | ||
803 | struct gk20a_fence *fence) | ||
804 | { | ||
805 | return __gk20a_channel_semaphore_incr(s, | ||
806 | true /* wfi */, | ||
807 | entry, fence, true); | ||
808 | } | ||
809 | |||
810 | static int gk20a_channel_semaphore_incr( | ||
811 | struct gk20a_channel_sync *s, | ||
812 | struct priv_cmd_entry *entry, | ||
813 | struct gk20a_fence *fence, | ||
814 | bool need_sync_fence, | ||
815 | bool register_irq) | ||
816 | { | ||
817 | /* Don't put wfi cmd to this one since we're not returning | ||
818 | * a fence to user space. */ | ||
819 | return __gk20a_channel_semaphore_incr(s, | ||
820 | false /* no wfi */, | ||
821 | entry, fence, need_sync_fence); | ||
822 | } | ||
823 | |||
824 | static int gk20a_channel_semaphore_incr_user( | ||
825 | struct gk20a_channel_sync *s, | ||
826 | int wait_fence_fd, | ||
827 | struct priv_cmd_entry *entry, | ||
828 | struct gk20a_fence *fence, | ||
829 | bool wfi, | ||
830 | bool need_sync_fence, | ||
831 | bool register_irq) | ||
832 | { | ||
833 | #ifdef CONFIG_SYNC | ||
834 | int err; | ||
835 | |||
836 | err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence, | ||
837 | need_sync_fence); | ||
838 | if (err) | ||
839 | return err; | ||
840 | |||
841 | return 0; | ||
842 | #else | ||
843 | struct gk20a_channel_semaphore *sema = | ||
844 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
845 | nvgpu_err(sema->c->g, | ||
846 | "trying to use sync fds with CONFIG_SYNC disabled"); | ||
847 | return -ENODEV; | ||
848 | #endif | ||
849 | } | ||
850 | |||
851 | static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s) | ||
852 | { | ||
853 | /* Nothing to do. */ | ||
854 | } | ||
855 | |||
856 | static void gk20a_channel_semaphore_signal_timeline( | ||
857 | struct gk20a_channel_sync *s) | ||
858 | { | ||
859 | struct gk20a_channel_semaphore *sp = | ||
860 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
861 | gk20a_sync_timeline_signal(sp->timeline); | ||
862 | } | ||
863 | |||
864 | static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s) | ||
865 | { | ||
866 | return -EINVAL; | ||
867 | } | ||
868 | |||
869 | static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) | ||
870 | { | ||
871 | struct gk20a_channel_semaphore *sema = | ||
872 | container_of(s, struct gk20a_channel_semaphore, ops); | ||
873 | if (sema->timeline) | ||
874 | gk20a_sync_timeline_destroy(sema->timeline); | ||
875 | |||
876 | /* The sema pool is cleaned up by the VM destroy. */ | ||
877 | sema->pool = NULL; | ||
878 | |||
879 | nvgpu_kfree(sema->c->g, sema); | ||
880 | } | ||
881 | |||
882 | static struct gk20a_channel_sync * | ||
883 | gk20a_channel_semaphore_create(struct channel_gk20a *c) | ||
884 | { | ||
885 | int asid = -1; | ||
886 | struct gk20a_channel_semaphore *sema; | ||
887 | char pool_name[20]; | ||
888 | |||
889 | if (WARN_ON(!c->vm)) | ||
890 | return NULL; | ||
891 | |||
892 | sema = nvgpu_kzalloc(c->g, sizeof(*sema)); | ||
893 | if (!sema) | ||
894 | return NULL; | ||
895 | sema->c = c; | ||
896 | |||
897 | if (c->vm->as_share) | ||
898 | asid = c->vm->as_share->id; | ||
899 | |||
900 | sprintf(pool_name, "semaphore_pool-%d", c->chid); | ||
901 | sema->pool = c->vm->sema_pool; | ||
902 | |||
903 | #ifdef CONFIG_SYNC | ||
904 | sema->timeline = gk20a_sync_timeline_create( | ||
905 | "gk20a_ch%d_as%d", c->chid, asid); | ||
906 | if (!sema->timeline) { | ||
907 | gk20a_channel_semaphore_destroy(&sema->ops); | ||
908 | return NULL; | ||
909 | } | ||
910 | #endif | ||
911 | nvgpu_atomic_set(&sema->ops.refcount, 0); | ||
912 | sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; | ||
913 | sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd; | ||
914 | sema->ops.incr = gk20a_channel_semaphore_incr; | ||
915 | sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi; | ||
916 | sema->ops.incr_user = gk20a_channel_semaphore_incr_user; | ||
917 | sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max; | ||
918 | sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; | ||
919 | sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; | ||
920 | sema->ops.destroy = gk20a_channel_semaphore_destroy; | ||
921 | |||
922 | return &sema->ops; | ||
923 | } | ||
924 | |||
925 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync) | ||
926 | { | ||
927 | sync->destroy(sync); | ||
928 | } | ||
929 | |||
930 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) | ||
931 | { | ||
932 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
933 | if (gk20a_platform_has_syncpoints(c->g)) | ||
934 | return gk20a_channel_syncpt_create(c); | ||
935 | #endif | ||
936 | return gk20a_channel_semaphore_create(c); | ||
937 | } | ||
938 | |||
939 | bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g) | ||
940 | { | ||
941 | return !gk20a_platform_has_syncpoints(g); | ||
942 | } | ||