summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2425
1 files changed, 2425 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
new file mode 100644
index 00000000..4be232f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -0,0 +1,2425 @@
1/*
2 * GK20A Graphics channel
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26#include <uapi/linux/nvgpu.h>
27
28#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_GK20A_CYCLE_STATS)
29#include <linux/dma-buf.h>
30#endif
31
32#include <nvgpu/semaphore.h>
33#include <nvgpu/timers.h>
34#include <nvgpu/kmem.h>
35#include <nvgpu/dma.h>
36#include <nvgpu/log.h>
37#include <nvgpu/atomic.h>
38#include <nvgpu/bug.h>
39#include <nvgpu/list.h>
40#include <nvgpu/circ_buf.h>
41#include <nvgpu/cond.h>
42#include <nvgpu/enabled.h>
43#include <nvgpu/debug.h>
44#include <nvgpu/ltc.h>
45#include <nvgpu/barrier.h>
46#include <nvgpu/ctxsw_trace.h>
47
48#include "gk20a.h"
49#include "dbg_gpu_gk20a.h"
50#include "fence_gk20a.h"
51
52static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
53static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c);
54
55static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
56static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
57
58static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
59
60static void channel_gk20a_joblist_add(struct channel_gk20a *c,
61 struct channel_gk20a_job *job);
62static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
63 struct channel_gk20a_job *job);
64static struct channel_gk20a_job *channel_gk20a_joblist_peek(
65 struct channel_gk20a *c);
66
67static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
68
69/* allocate GPU channel */
70static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
71{
72 struct channel_gk20a *ch = NULL;
73 struct gk20a *g = f->g;
74
75 nvgpu_mutex_acquire(&f->free_chs_mutex);
76 if (!nvgpu_list_empty(&f->free_chs)) {
77 ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a,
78 free_chs);
79 nvgpu_list_del(&ch->free_chs);
80 WARN_ON(nvgpu_atomic_read(&ch->ref_count));
81 WARN_ON(ch->referenceable);
82 f->used_channels++;
83 }
84 nvgpu_mutex_release(&f->free_chs_mutex);
85
86 if (g->aggressive_sync_destroy_thresh &&
87 (f->used_channels >
88 g->aggressive_sync_destroy_thresh))
89 g->aggressive_sync_destroy = true;
90
91 return ch;
92}
93
94static void free_channel(struct fifo_gk20a *f,
95 struct channel_gk20a *ch)
96{
97 struct gk20a *g = f->g;
98
99 trace_gk20a_release_used_channel(ch->chid);
100 /* refcount is zero here and channel is in a freed/dead state */
101 nvgpu_mutex_acquire(&f->free_chs_mutex);
102 /* add to head to increase visibility of timing-related bugs */
103 nvgpu_list_add(&ch->free_chs, &f->free_chs);
104 f->used_channels--;
105 nvgpu_mutex_release(&f->free_chs_mutex);
106
107 /*
108 * On teardown it is not possible to dereference platform, but ignoring
109 * this is fine then because no new channels would be created.
110 */
111 if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
112 if (g->aggressive_sync_destroy_thresh &&
113 (f->used_channels <
114 g->aggressive_sync_destroy_thresh))
115 g->aggressive_sync_destroy = false;
116 }
117}
118
119int channel_gk20a_commit_va(struct channel_gk20a *c)
120{
121 struct gk20a *g = c->g;
122
123 gk20a_dbg_fn("");
124
125 g->ops.mm.init_inst_block(&c->inst_block, c->vm,
126 c->vm->gmmu_page_sizes[gmmu_page_size_big]);
127
128 return 0;
129}
130
131u32 gk20a_channel_get_timeslice(struct channel_gk20a *ch)
132{
133 struct gk20a *g = ch->g;
134
135 if (!ch->timeslice_us)
136 return g->ops.fifo.default_timeslice_us(g);
137
138 return ch->timeslice_us;
139}
140
141int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
142 int timeslice_period,
143 int *__timeslice_timeout, int *__timeslice_scale)
144{
145 int value = scale_ptimer(timeslice_period,
146 ptimer_scalingfactor10x(g->ptimer_src_freq));
147 int shift = 0;
148
149 /* value field is 8 bits long */
150 while (value >= 1 << 8) {
151 value >>= 1;
152 shift++;
153 }
154
155 /* time slice register is only 18bits long */
156 if ((value << shift) >= 1<<19) {
157 pr_err("Requested timeslice value is clamped to 18 bits\n");
158 value = 255;
159 shift = 10;
160 }
161
162 *__timeslice_timeout = value;
163 *__timeslice_scale = shift;
164
165 return 0;
166}
167
168int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
169{
170 return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->chid, add, true);
171}
172
173int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
174{
175 struct tsg_gk20a *tsg;
176
177 if (gk20a_is_channel_marked_as_tsg(ch)) {
178 tsg = &g->fifo.tsg[ch->tsgid];
179 g->ops.fifo.enable_tsg(tsg);
180 } else {
181 g->ops.fifo.enable_channel(ch);
182 }
183
184 return 0;
185}
186
187int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
188{
189 struct tsg_gk20a *tsg;
190
191 if (gk20a_is_channel_marked_as_tsg(ch)) {
192 tsg = &g->fifo.tsg[ch->tsgid];
193 g->ops.fifo.disable_tsg(tsg);
194 } else {
195 g->ops.fifo.disable_channel(ch);
196 }
197
198 return 0;
199}
200
201void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
202{
203 struct channel_gk20a_job *job, *n;
204 bool released_job_semaphore = false;
205 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
206
207 /* synchronize with actual job cleanup */
208 nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
209
210 /* ensure no fences are pending */
211 nvgpu_mutex_acquire(&ch->sync_lock);
212 if (ch->sync)
213 ch->sync->set_min_eq_max(ch->sync);
214 nvgpu_mutex_release(&ch->sync_lock);
215
216 /* release all job semaphores (applies only to jobs that use
217 semaphore synchronization) */
218 channel_gk20a_joblist_lock(ch);
219 if (pre_alloc_enabled) {
220 int tmp_get = ch->joblist.pre_alloc.get;
221 int put = ch->joblist.pre_alloc.put;
222
223 /*
224 * ensure put is read before any subsequent reads.
225 * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job()
226 */
227 nvgpu_smp_rmb();
228
229 while (tmp_get != put) {
230 job = &ch->joblist.pre_alloc.jobs[tmp_get];
231 if (job->post_fence->semaphore) {
232 __nvgpu_semaphore_release(
233 job->post_fence->semaphore, true);
234 released_job_semaphore = true;
235 }
236 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
237 }
238 } else {
239 nvgpu_list_for_each_entry_safe(job, n,
240 &ch->joblist.dynamic.jobs,
241 channel_gk20a_job, list) {
242 if (job->post_fence->semaphore) {
243 __nvgpu_semaphore_release(
244 job->post_fence->semaphore, true);
245 released_job_semaphore = true;
246 }
247 }
248 }
249 channel_gk20a_joblist_unlock(ch);
250
251 nvgpu_mutex_release(&ch->joblist.cleanup_lock);
252
253 if (released_job_semaphore)
254 nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
255
256 /*
257 * When closing the channel, this scheduled update holds one ref which
258 * is waited for before advancing with freeing.
259 */
260 gk20a_channel_update(ch);
261}
262
263void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
264{
265 gk20a_dbg_fn("");
266
267 if (gk20a_is_channel_marked_as_tsg(ch))
268 return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt);
269
270 /* make sure new kickoffs are prevented */
271 ch->has_timedout = true;
272
273 ch->g->ops.fifo.disable_channel(ch);
274
275 if (channel_preempt && ch->ch_ctx.gr_ctx)
276 ch->g->ops.fifo.preempt_channel(ch->g, ch->chid);
277
278 gk20a_channel_abort_clean_up(ch);
279}
280
281int gk20a_wait_channel_idle(struct channel_gk20a *ch)
282{
283 bool channel_idle = false;
284 struct nvgpu_timeout timeout;
285
286 nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g),
287 NVGPU_TIMER_CPU_TIMER);
288
289 do {
290 channel_gk20a_joblist_lock(ch);
291 channel_idle = channel_gk20a_joblist_is_empty(ch);
292 channel_gk20a_joblist_unlock(ch);
293 if (channel_idle)
294 break;
295
296 nvgpu_usleep_range(1000, 3000);
297 } while (!nvgpu_timeout_expired(&timeout));
298
299 if (!channel_idle) {
300 nvgpu_err(ch->g, "jobs not freed for channel %d",
301 ch->chid);
302 return -EBUSY;
303 }
304
305 return 0;
306}
307
308void gk20a_disable_channel(struct channel_gk20a *ch)
309{
310 gk20a_channel_abort(ch, true);
311 channel_gk20a_update_runlist(ch, false);
312}
313
314int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
315 u32 level)
316{
317 struct gk20a *g = ch->g;
318 int ret;
319
320 if (gk20a_is_channel_marked_as_tsg(ch)) {
321 nvgpu_err(g, "invalid operation for TSG!");
322 return -EINVAL;
323 }
324
325 switch (level) {
326 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
327 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
328 case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
329 ret = g->ops.fifo.set_runlist_interleave(g, ch->chid,
330 false, 0, level);
331 break;
332 default:
333 ret = -EINVAL;
334 break;
335 }
336
337 gk20a_dbg(gpu_dbg_sched, "chid=%u interleave=%u", ch->chid, level);
338
339 return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
340}
341
342/**
343 * gk20a_set_error_notifier_locked()
344 * Should be called with ch->error_notifier_mutex held
345 */
346void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
347{
348 if (ch->error_notifier_ref) {
349 struct timespec time_data;
350 u64 nsec;
351 getnstimeofday(&time_data);
352 nsec = ((u64)time_data.tv_sec) * 1000000000u +
353 (u64)time_data.tv_nsec;
354 ch->error_notifier->time_stamp.nanoseconds[0] =
355 (u32)nsec;
356 ch->error_notifier->time_stamp.nanoseconds[1] =
357 (u32)(nsec >> 32);
358 ch->error_notifier->info32 = error;
359 ch->error_notifier->status = 0xffff;
360
361 nvgpu_err(ch->g,
362 "error notifier set to %d for ch %d", error, ch->chid);
363 }
364}
365
366void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
367{
368 nvgpu_mutex_acquire(&ch->error_notifier_mutex);
369 gk20a_set_error_notifier_locked(ch, error);
370 nvgpu_mutex_release(&ch->error_notifier_mutex);
371}
372
373static void gk20a_wait_until_counter_is_N(
374 struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
375 struct nvgpu_cond *c, const char *caller, const char *counter_name)
376{
377 while (true) {
378 if (NVGPU_COND_WAIT(
379 c,
380 nvgpu_atomic_read(counter) == wait_value,
381 5000) == 0)
382 break;
383
384 nvgpu_warn(ch->g,
385 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
386 caller, ch->chid, counter_name,
387 nvgpu_atomic_read(counter), wait_value);
388
389 gk20a_channel_dump_ref_actions(ch);
390 }
391}
392
393#if defined(CONFIG_GK20A_CYCLE_STATS)
394void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
395{
396 /* disable existing cyclestats buffer */
397 nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
398 if (ch->cyclestate.cyclestate_buffer_handler) {
399 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
400 ch->cyclestate.cyclestate_buffer);
401 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
402 ch->cyclestate.cyclestate_buffer_handler = NULL;
403 ch->cyclestate.cyclestate_buffer = NULL;
404 ch->cyclestate.cyclestate_buffer_size = 0;
405 }
406 nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
407}
408
409int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
410{
411 int ret;
412
413 nvgpu_mutex_acquire(&ch->cs_client_mutex);
414 if (ch->cs_client) {
415 ret = gr_gk20a_css_detach(ch, ch->cs_client);
416 ch->cs_client = NULL;
417 } else {
418 ret = 0;
419 }
420 nvgpu_mutex_release(&ch->cs_client_mutex);
421
422 return ret;
423}
424
425#endif
426
427/* call ONLY when no references to the channel exist: after the last put */
428static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
429{
430 struct gk20a *g = ch->g;
431 struct fifo_gk20a *f = &g->fifo;
432 struct gr_gk20a *gr = &g->gr;
433 struct vm_gk20a *ch_vm = ch->vm;
434 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
435 struct dbg_session_gk20a *dbg_s;
436 struct dbg_session_data *session_data, *tmp_s;
437 struct dbg_session_channel_data *ch_data, *tmp;
438 bool was_tsg = false;
439 int err;
440
441 gk20a_dbg_fn("");
442
443 WARN_ON(ch->g == NULL);
444
445 trace_gk20a_free_channel(ch->chid);
446
447 /*
448 * Disable channel/TSG and unbind here. This should not be executed if
449 * HW access is not available during shutdown/removal path as it will
450 * trigger a timeout
451 */
452 if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
453 /* abort channel and remove from runlist */
454 if (gk20a_is_channel_marked_as_tsg(ch)) {
455 err = g->ops.fifo.tsg_unbind_channel(ch);
456 if (err)
457 nvgpu_err(g,
458 "failed to unbind channel %d from TSG",
459 ch->chid);
460 /*
461 * Channel is not a part of TSG this point onwards
462 * So stash its status and use it whenever necessary
463 * e.g. while releasing gr_ctx in
464 * g->ops.gr.free_channel_ctx()
465 */
466 was_tsg = true;
467 } else {
468 gk20a_disable_channel(ch);
469 }
470 }
471 /* wait until there's only our ref to the channel */
472 if (!force)
473 gk20a_wait_until_counter_is_N(
474 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
475 __func__, "references");
476
477 /* wait until all pending interrupts for recently completed
478 * jobs are handled */
479 nvgpu_wait_for_deferred_interrupts(g);
480
481 /* prevent new refs */
482 nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
483 if (!ch->referenceable) {
484 nvgpu_spinlock_release(&ch->ref_obtain_lock);
485 nvgpu_err(ch->g,
486 "Extra %s() called to channel %u",
487 __func__, ch->chid);
488 return;
489 }
490 ch->referenceable = false;
491 nvgpu_spinlock_release(&ch->ref_obtain_lock);
492
493 /* matches with the initial reference in gk20a_open_new_channel() */
494 nvgpu_atomic_dec(&ch->ref_count);
495
496 /* wait until no more refs to the channel */
497 if (!force)
498 gk20a_wait_until_counter_is_N(
499 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
500 __func__, "references");
501
502 /* if engine reset was deferred, perform it now */
503 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
504 if (g->fifo.deferred_reset_pending) {
505 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
506 " deferred, running now");
507 /* if lock is already taken, a reset is taking place
508 so no need to repeat */
509 if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
510 gk20a_fifo_deferred_reset(g, ch);
511 nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
512 }
513 }
514 nvgpu_mutex_release(&f->deferred_reset_mutex);
515
516 if (!gk20a_channel_as_bound(ch))
517 goto unbind;
518
519 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
520 timeout);
521
522#ifdef CONFIG_GK20A_CTXSW_TRACE
523 if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
524 g->ops.fecs_trace.unbind_channel(g, ch);
525#endif
526
527 /* release channel ctx */
528 g->ops.gr.free_channel_ctx(ch, was_tsg);
529
530 gk20a_gr_flush_channel_tlb(gr);
531
532 nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem);
533 nvgpu_big_free(g, ch->gpfifo.pipe);
534 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
535
536#if defined(CONFIG_GK20A_CYCLE_STATS)
537 gk20a_channel_free_cycle_stats_buffer(ch);
538 gk20a_channel_free_cycle_stats_snapshot(ch);
539#endif
540
541 channel_gk20a_free_priv_cmdbuf(ch);
542
543 /* sync must be destroyed before releasing channel vm */
544 nvgpu_mutex_acquire(&ch->sync_lock);
545 if (ch->sync) {
546 gk20a_channel_sync_destroy(ch->sync);
547 ch->sync = NULL;
548 }
549 nvgpu_mutex_release(&ch->sync_lock);
550
551 /*
552 * free the channel used semaphore index.
553 * we need to do this before releasing the address space,
554 * as the semaphore pool might get freed after that point.
555 */
556 if (ch->hw_sema)
557 nvgpu_semaphore_free_hw_sema(ch);
558
559 /*
560 * When releasing the channel we unbind the VM - so release the ref.
561 */
562 nvgpu_vm_put(ch_vm);
563
564 nvgpu_spinlock_acquire(&ch->update_fn_lock);
565 ch->update_fn = NULL;
566 ch->update_fn_data = NULL;
567 nvgpu_spinlock_release(&ch->update_fn_lock);
568 cancel_work_sync(&ch->update_fn_work);
569
570 /* make sure we don't have deferred interrupts pending that
571 * could still touch the channel */
572 nvgpu_wait_for_deferred_interrupts(g);
573
574unbind:
575 g->ops.fifo.unbind_channel(ch);
576 g->ops.fifo.free_inst(g, ch);
577
578 /* put back the channel-wide submit ref from init */
579 if (ch->deterministic) {
580 nvgpu_rwsem_down_read(&g->deterministic_busy);
581 ch->deterministic = false;
582 if (!ch->deterministic_railgate_allowed)
583 gk20a_idle(g);
584 ch->deterministic_railgate_allowed = false;
585
586 nvgpu_rwsem_up_read(&g->deterministic_busy);
587 }
588
589 ch->vpr = false;
590 ch->vm = NULL;
591
592 WARN_ON(ch->sync);
593
594 /* unlink all debug sessions */
595 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
596
597 list_for_each_entry_safe(session_data, tmp_s,
598 &ch->dbg_s_list, dbg_s_entry) {
599 dbg_s = session_data->dbg_s;
600 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
601 list_for_each_entry_safe(ch_data, tmp,
602 &dbg_s->ch_list, ch_entry) {
603 if (ch_data->chid == ch->chid)
604 ch_data->unbind_single_channel(dbg_s, ch_data);
605 }
606 nvgpu_mutex_release(&dbg_s->ch_list_lock);
607 }
608
609 nvgpu_mutex_release(&g->dbg_sessions_lock);
610
611 /* free pre-allocated resources, if applicable */
612 if (channel_gk20a_is_prealloc_enabled(ch))
613 channel_gk20a_free_prealloc_resources(ch);
614
615#if GK20A_CHANNEL_REFCOUNT_TRACKING
616 memset(ch->ref_actions, 0, sizeof(ch->ref_actions));
617 ch->ref_actions_put = 0;
618#endif
619
620 /* make sure we catch accesses of unopened channels in case
621 * there's non-refcounted channel pointers hanging around */
622 ch->g = NULL;
623 nvgpu_smp_wmb();
624
625 /* ALWAYS last */
626 free_channel(f, ch);
627}
628
629static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
630{
631#if GK20A_CHANNEL_REFCOUNT_TRACKING
632 size_t i, get;
633 s64 now = nvgpu_current_time_ms();
634 s64 prev = 0;
635 struct device *dev = dev_from_gk20a(ch->g);
636
637 nvgpu_spinlock_acquire(&ch->ref_actions_lock);
638
639 dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n",
640 ch->chid, nvgpu_atomic_read(&ch->ref_count));
641
642 /* start at the oldest possible entry. put is next insertion point */
643 get = ch->ref_actions_put;
644
645 /*
646 * If the buffer is not full, this will first loop to the oldest entry,
647 * skipping not-yet-initialized entries. There is no ref_actions_get.
648 */
649 for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) {
650 struct channel_gk20a_ref_action *act = &ch->ref_actions[get];
651
652 if (act->trace.nr_entries) {
653 dev_info(dev, "%s ref %zu steps ago (age %d ms, diff %d ms)\n",
654 act->type == channel_gk20a_ref_action_get
655 ? "GET" : "PUT",
656 GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i,
657 now - act->timestamp_ms,
658 act->timestamp_ms - prev);
659
660 print_stack_trace(&act->trace, 0);
661 prev = act->timestamp_ms;
662 }
663
664 get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
665 }
666
667 nvgpu_spinlock_release(&ch->ref_actions_lock);
668#endif
669}
670
671static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
672 enum channel_gk20a_ref_action_type type)
673{
674#if GK20A_CHANNEL_REFCOUNT_TRACKING
675 struct channel_gk20a_ref_action *act;
676
677 nvgpu_spinlock_acquire(&ch->ref_actions_lock);
678
679 act = &ch->ref_actions[ch->ref_actions_put];
680 act->type = type;
681 act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN;
682 act->trace.nr_entries = 0;
683 act->trace.skip = 3; /* onwards from the caller of this */
684 act->trace.entries = act->trace_entries;
685 save_stack_trace(&act->trace);
686 act->timestamp_ms = nvgpu_current_time_ms();
687 ch->ref_actions_put = (ch->ref_actions_put + 1) %
688 GK20A_CHANNEL_REFCOUNT_TRACKING;
689
690 nvgpu_spinlock_release(&ch->ref_actions_lock);
691#endif
692}
693
694/* Try to get a reference to the channel. Return nonzero on success. If fails,
695 * the channel is dead or being freed elsewhere and you must not touch it.
696 *
697 * Always when a channel_gk20a pointer is seen and about to be used, a
698 * reference must be held to it - either by you or the caller, which should be
699 * documented well or otherwise clearly seen. This usually boils down to the
700 * file from ioctls directly, or an explicit get in exception handlers when the
701 * channel is found by a chid.
702 *
703 * Most global functions in this file require a reference to be held by the
704 * caller.
705 */
706struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
707 const char *caller) {
708 struct channel_gk20a *ret;
709
710 nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
711
712 if (likely(ch->referenceable)) {
713 gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
714 nvgpu_atomic_inc(&ch->ref_count);
715 ret = ch;
716 } else
717 ret = NULL;
718
719 nvgpu_spinlock_release(&ch->ref_obtain_lock);
720
721 if (ret)
722 trace_gk20a_channel_get(ch->chid, caller);
723
724 return ret;
725}
726
727void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
728{
729 gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
730 trace_gk20a_channel_put(ch->chid, caller);
731 nvgpu_atomic_dec(&ch->ref_count);
732 nvgpu_cond_broadcast(&ch->ref_count_dec_wq);
733
734 /* More puts than gets. Channel is probably going to get
735 * stuck. */
736 WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);
737
738 /* Also, more puts than gets. ref_count can go to 0 only if
739 * the channel is closing. Channel is probably going to get
740 * stuck. */
741 WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
742}
743
744void gk20a_channel_close(struct channel_gk20a *ch)
745{
746 gk20a_free_channel(ch, false);
747}
748
749/*
750 * Be careful with this - it is meant for terminating channels when we know the
751 * driver is otherwise dying. Ref counts and the like are ignored by this
752 * version of the cleanup.
753 */
754void __gk20a_channel_kill(struct channel_gk20a *ch)
755{
756 gk20a_free_channel(ch, true);
757}
758
759static void gk20a_channel_update_runcb_fn(struct work_struct *work)
760{
761 struct channel_gk20a *ch =
762 container_of(work, struct channel_gk20a, update_fn_work);
763 void (*update_fn)(struct channel_gk20a *, void *);
764 void *update_fn_data;
765
766 nvgpu_spinlock_acquire(&ch->update_fn_lock);
767 update_fn = ch->update_fn;
768 update_fn_data = ch->update_fn_data;
769 nvgpu_spinlock_release(&ch->update_fn_lock);
770
771 if (update_fn)
772 update_fn(ch, update_fn_data);
773}
774
775struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
776 void (*update_fn)(struct channel_gk20a *, void *),
777 void *update_fn_data,
778 int runlist_id,
779 bool is_privileged_channel)
780{
781 struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
782
783 if (ch) {
784 nvgpu_spinlock_acquire(&ch->update_fn_lock);
785 ch->update_fn = update_fn;
786 ch->update_fn_data = update_fn_data;
787 nvgpu_spinlock_release(&ch->update_fn_lock);
788 }
789
790 return ch;
791}
792
793struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
794 s32 runlist_id,
795 bool is_privileged_channel)
796{
797 struct fifo_gk20a *f = &g->fifo;
798 struct channel_gk20a *ch;
799 struct gk20a_event_id_data *event_id_data, *event_id_data_temp;
800
801 /* compatibility with existing code */
802 if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) {
803 runlist_id = gk20a_fifo_get_gr_runlist_id(g);
804 }
805
806 gk20a_dbg_fn("");
807
808 ch = allocate_channel(f);
809 if (ch == NULL) {
810 /* TBD: we want to make this virtualizable */
811 nvgpu_err(g, "out of hw chids");
812 return NULL;
813 }
814
815 trace_gk20a_open_new_channel(ch->chid);
816
817 BUG_ON(ch->g);
818 ch->g = g;
819
820 /* Runlist for the channel */
821 ch->runlist_id = runlist_id;
822
823 /* Channel privilege level */
824 ch->is_privileged_channel = is_privileged_channel;
825
826 if (g->ops.fifo.alloc_inst(g, ch)) {
827 ch->g = NULL;
828 free_channel(f, ch);
829 nvgpu_err(g,
830 "failed to open gk20a channel, out of inst mem");
831 return NULL;
832 }
833
834 /* now the channel is in a limbo out of the free list but not marked as
835 * alive and used (i.e. get-able) yet */
836
837 ch->pid = current->pid;
838 ch->tgid = current->tgid; /* process granularity for FECS traces */
839
840 /* unhook all events created on this channel */
841 nvgpu_mutex_acquire(&ch->event_id_list_lock);
842 nvgpu_list_for_each_entry_safe(event_id_data, event_id_data_temp,
843 &ch->event_id_list,
844 gk20a_event_id_data,
845 event_id_node) {
846 nvgpu_list_del(&event_id_data->event_id_node);
847 }
848 nvgpu_mutex_release(&ch->event_id_list_lock);
849
850 /* By default, channel is regular (non-TSG) channel */
851 ch->tsgid = NVGPU_INVALID_TSG_ID;
852
853 /* reset timeout counter and update timestamp */
854 ch->timeout_accumulated_ms = 0;
855 ch->timeout_gpfifo_get = 0;
856 /* set gr host default timeout */
857 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
858 ch->timeout_debug_dump = true;
859 ch->has_timedout = false;
860 ch->wdt_enabled = true;
861 ch->obj_class = 0;
862 ch->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
863 ch->timeslice_us = g->timeslice_low_priority_us;
864#ifdef CONFIG_TEGRA_19x_GPU
865 memset(&ch->t19x, 0, sizeof(struct channel_t19x));
866#endif
867
868
869 /* The channel is *not* runnable at this point. It still needs to have
870 * an address space bound and allocate a gpfifo and grctx. */
871
872 nvgpu_cond_init(&ch->notifier_wq);
873 nvgpu_cond_init(&ch->semaphore_wq);
874
875 ch->update_fn = NULL;
876 ch->update_fn_data = NULL;
877 nvgpu_spinlock_init(&ch->update_fn_lock);
878 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
879
880 /* Mark the channel alive, get-able, with 1 initial use
881 * references. The initial reference will be decreased in
882 * gk20a_free_channel() */
883 ch->referenceable = true;
884 nvgpu_atomic_set(&ch->ref_count, 1);
885 nvgpu_smp_wmb();
886
887 return ch;
888}
889
890/* allocate private cmd buffer.
891 used for inserting commands before/after user submitted buffers. */
892static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
893{
894 struct gk20a *g = c->g;
895 struct vm_gk20a *ch_vm = c->vm;
896 struct priv_cmd_queue *q = &c->priv_cmd_q;
897 u32 size;
898 int err = 0;
899
900 /*
901 * Compute the amount of priv_cmdbuf space we need. In general the worst
902 * case is the kernel inserts both a semaphore pre-fence and post-fence.
903 * Any sync-pt fences will take less memory so we can ignore them for
904 * now.
905 *
906 * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b,
907 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
908 * dwords: all the same as an ACQ plus a non-stalling intr which is
909 * another 2 dwords.
910 *
911 * Lastly the number of gpfifo entries per channel is fixed so at most
912 * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one
913 * userspace entry, and one post-fence entry). Thus the computation is:
914 *
915 * (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes.
916 */
917 size = roundup_pow_of_two(c->gpfifo.entry_num *
918 2 * 18 * sizeof(u32) / 3);
919
920 err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem);
921 if (err) {
922 nvgpu_err(g, "%s: memory allocation failed", __func__);
923 goto clean_up;
924 }
925
926 q->size = q->mem.size / sizeof (u32);
927
928 return 0;
929
930clean_up:
931 channel_gk20a_free_priv_cmdbuf(c);
932 return err;
933}
934
935static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
936{
937 struct vm_gk20a *ch_vm = c->vm;
938 struct priv_cmd_queue *q = &c->priv_cmd_q;
939
940 if (q->size == 0)
941 return;
942
943 nvgpu_dma_unmap_free(ch_vm, &q->mem);
944
945 memset(q, 0, sizeof(struct priv_cmd_queue));
946}
947
948/* allocate a cmd buffer with given size. size is number of u32 entries */
949int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
950 struct priv_cmd_entry *e)
951{
952 struct priv_cmd_queue *q = &c->priv_cmd_q;
953 u32 free_count;
954 u32 size = orig_size;
955
956 gk20a_dbg_fn("size %d", orig_size);
957
958 if (!e) {
959 nvgpu_err(c->g,
960 "ch %d: priv cmd entry is null",
961 c->chid);
962 return -EINVAL;
963 }
964
965 /* if free space in the end is less than requested, increase the size
966 * to make the real allocated space start from beginning. */
967 if (q->put + size > q->size)
968 size = orig_size + (q->size - q->put);
969
970 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
971 c->chid, q->get, q->put);
972
973 free_count = (q->size - (q->put - q->get) - 1) % q->size;
974
975 if (size > free_count)
976 return -EAGAIN;
977
978 e->size = orig_size;
979 e->mem = &q->mem;
980
981 /* if we have increased size to skip free space in the end, set put
982 to beginning of cmd buffer (0) + size */
983 if (size != orig_size) {
984 e->off = 0;
985 e->gva = q->mem.gpu_va;
986 q->put = orig_size;
987 } else {
988 e->off = q->put;
989 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
990 q->put = (q->put + orig_size) & (q->size - 1);
991 }
992
993 /* we already handled q->put + size > q->size so BUG_ON this */
994 BUG_ON(q->put > q->size);
995
996 /*
997 * commit the previous writes before making the entry valid.
998 * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf().
999 */
1000 nvgpu_smp_wmb();
1001
1002 e->valid = true;
1003 gk20a_dbg_fn("done");
1004
1005 return 0;
1006}
1007
1008/* Don't call this to free an explict cmd entry.
1009 * It doesn't update priv_cmd_queue get/put */
1010void free_priv_cmdbuf(struct channel_gk20a *c,
1011 struct priv_cmd_entry *e)
1012{
1013 if (channel_gk20a_is_prealloc_enabled(c))
1014 memset(e, 0, sizeof(struct priv_cmd_entry));
1015 else
1016 nvgpu_kfree(c->g, e);
1017}
1018
1019int channel_gk20a_alloc_job(struct channel_gk20a *c,
1020 struct channel_gk20a_job **job_out)
1021{
1022 int err = 0;
1023
1024 if (channel_gk20a_is_prealloc_enabled(c)) {
1025 int put = c->joblist.pre_alloc.put;
1026 int get = c->joblist.pre_alloc.get;
1027
1028 /*
1029 * ensure all subsequent reads happen after reading get.
1030 * see corresponding nvgpu_smp_wmb in
1031 * gk20a_channel_clean_up_jobs()
1032 */
1033 nvgpu_smp_rmb();
1034
1035 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
1036 *job_out = &c->joblist.pre_alloc.jobs[put];
1037 else {
1038 nvgpu_warn(c->g,
1039 "out of job ringbuffer space");
1040 err = -EAGAIN;
1041 }
1042 } else {
1043 *job_out = nvgpu_kzalloc(c->g,
1044 sizeof(struct channel_gk20a_job));
1045 if (!*job_out)
1046 err = -ENOMEM;
1047 }
1048
1049 return err;
1050}
1051
1052void channel_gk20a_free_job(struct channel_gk20a *c,
1053 struct channel_gk20a_job *job)
1054{
1055 /*
1056 * In case of pre_allocated jobs, we need to clean out
1057 * the job but maintain the pointers to the priv_cmd_entry,
1058 * since they're inherently tied to the job node.
1059 */
1060 if (channel_gk20a_is_prealloc_enabled(c)) {
1061 struct priv_cmd_entry *wait_cmd = job->wait_cmd;
1062 struct priv_cmd_entry *incr_cmd = job->incr_cmd;
1063 memset(job, 0, sizeof(*job));
1064 job->wait_cmd = wait_cmd;
1065 job->incr_cmd = incr_cmd;
1066 } else
1067 nvgpu_kfree(c->g, job);
1068}
1069
1070void channel_gk20a_joblist_lock(struct channel_gk20a *c)
1071{
1072 if (channel_gk20a_is_prealloc_enabled(c))
1073 nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
1074 else
1075 nvgpu_spinlock_acquire(&c->joblist.dynamic.lock);
1076}
1077
1078void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
1079{
1080 if (channel_gk20a_is_prealloc_enabled(c))
1081 nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
1082 else
1083 nvgpu_spinlock_release(&c->joblist.dynamic.lock);
1084}
1085
1086static struct channel_gk20a_job *channel_gk20a_joblist_peek(
1087 struct channel_gk20a *c)
1088{
1089 int get;
1090 struct channel_gk20a_job *job = NULL;
1091
1092 if (channel_gk20a_is_prealloc_enabled(c)) {
1093 if (!channel_gk20a_joblist_is_empty(c)) {
1094 get = c->joblist.pre_alloc.get;
1095 job = &c->joblist.pre_alloc.jobs[get];
1096 }
1097 } else {
1098 if (!nvgpu_list_empty(&c->joblist.dynamic.jobs))
1099 job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs,
1100 channel_gk20a_job, list);
1101 }
1102
1103 return job;
1104}
1105
1106static void channel_gk20a_joblist_add(struct channel_gk20a *c,
1107 struct channel_gk20a_job *job)
1108{
1109 if (channel_gk20a_is_prealloc_enabled(c)) {
1110 c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) %
1111 (c->joblist.pre_alloc.length);
1112 } else {
1113 nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs);
1114 }
1115}
1116
1117static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
1118 struct channel_gk20a_job *job)
1119{
1120 if (channel_gk20a_is_prealloc_enabled(c)) {
1121 c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) %
1122 (c->joblist.pre_alloc.length);
1123 } else {
1124 nvgpu_list_del(&job->list);
1125 }
1126}
1127
1128bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c)
1129{
1130 if (channel_gk20a_is_prealloc_enabled(c)) {
1131 int get = c->joblist.pre_alloc.get;
1132 int put = c->joblist.pre_alloc.put;
1133 return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length));
1134 }
1135
1136 return nvgpu_list_empty(&c->joblist.dynamic.jobs);
1137}
1138
1139bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
1140{
1141 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
1142
1143 nvgpu_smp_rmb();
1144 return pre_alloc_enabled;
1145}
1146
1147static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
1148 unsigned int num_jobs)
1149{
1150 unsigned int i;
1151 int err;
1152 size_t size;
1153 struct priv_cmd_entry *entries = NULL;
1154
1155 if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs)
1156 return -EINVAL;
1157
1158 /*
1159 * pre-allocate the job list.
1160 * since vmalloc take in an unsigned long, we need
1161 * to make sure we don't hit an overflow condition
1162 */
1163 size = sizeof(struct channel_gk20a_job);
1164 if (num_jobs <= ULONG_MAX / size)
1165 c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
1166 num_jobs * size);
1167 if (!c->joblist.pre_alloc.jobs) {
1168 err = -ENOMEM;
1169 goto clean_up;
1170 }
1171
1172 /*
1173 * pre-allocate 2x priv_cmd_entry for each job up front.
1174 * since vmalloc take in an unsigned long, we need
1175 * to make sure we don't hit an overflow condition
1176 */
1177 size = sizeof(struct priv_cmd_entry);
1178 if (num_jobs <= ULONG_MAX / (size << 1))
1179 entries = nvgpu_vzalloc(c->g, (num_jobs << 1) * size);
1180 if (!entries) {
1181 err = -ENOMEM;
1182 goto clean_up_joblist;
1183 }
1184
1185 for (i = 0; i < num_jobs; i++) {
1186 c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
1187 c->joblist.pre_alloc.jobs[i].incr_cmd =
1188 &entries[i + num_jobs];
1189 }
1190
1191 /* pre-allocate a fence pool */
1192 err = gk20a_alloc_fence_pool(c, num_jobs);
1193 if (err)
1194 goto clean_up_priv_cmd;
1195
1196 c->joblist.pre_alloc.length = num_jobs;
1197
1198 /*
1199 * commit the previous writes before setting the flag.
1200 * see corresponding nvgpu_smp_rmb in
1201 * channel_gk20a_is_prealloc_enabled()
1202 */
1203 nvgpu_smp_wmb();
1204 c->joblist.pre_alloc.enabled = true;
1205
1206 return 0;
1207
1208clean_up_priv_cmd:
1209 nvgpu_vfree(c->g, entries);
1210clean_up_joblist:
1211 nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
1212clean_up:
1213 memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
1214 return err;
1215}
1216
1217static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
1218{
1219 nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd);
1220 nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
1221 gk20a_free_fence_pool(c);
1222
1223 /*
1224 * commit the previous writes before disabling the flag.
1225 * see corresponding nvgpu_smp_rmb in
1226 * channel_gk20a_is_prealloc_enabled()
1227 */
1228 nvgpu_smp_wmb();
1229 c->joblist.pre_alloc.enabled = false;
1230}
1231
1232int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
1233 unsigned int num_entries,
1234 unsigned int num_inflight_jobs,
1235 u32 flags)
1236{
1237 struct gk20a *g = c->g;
1238 struct vm_gk20a *ch_vm;
1239 u32 gpfifo_size, gpfifo_entry_size;
1240 int err = 0;
1241 unsigned long acquire_timeout;
1242
1243 gpfifo_size = num_entries;
1244 gpfifo_entry_size = nvgpu_get_gpfifo_entry_size();
1245
1246 if (flags & NVGPU_GPFIFO_FLAGS_SUPPORT_VPR)
1247 c->vpr = true;
1248
1249 if (flags & NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC) {
1250 nvgpu_rwsem_down_read(&g->deterministic_busy);
1251 /*
1252 * Railgating isn't deterministic; instead of disallowing
1253 * railgating globally, take a power refcount for this
1254 * channel's lifetime. The gk20a_idle() pair for this happens
1255 * when the channel gets freed.
1256 *
1257 * Deterministic flag and this busy must be atomic within the
1258 * busy lock.
1259 */
1260 err = gk20a_busy(g);
1261 if (err) {
1262 nvgpu_rwsem_up_read(&g->deterministic_busy);
1263 return err;
1264 }
1265
1266 c->deterministic = true;
1267 nvgpu_rwsem_up_read(&g->deterministic_busy);
1268 }
1269
1270 /* an address space needs to have been bound at this point. */
1271 if (!gk20a_channel_as_bound(c)) {
1272 nvgpu_err(g,
1273 "not bound to an address space at time of gpfifo"
1274 " allocation.");
1275 err = -EINVAL;
1276 goto clean_up_idle;
1277 }
1278 ch_vm = c->vm;
1279
1280 if (c->gpfifo.mem.size) {
1281 nvgpu_err(g, "channel %d :"
1282 "gpfifo already allocated", c->chid);
1283 err = -EEXIST;
1284 goto clean_up_idle;
1285 }
1286
1287 err = nvgpu_dma_alloc_map_sys(ch_vm,
1288 gpfifo_size * gpfifo_entry_size,
1289 &c->gpfifo.mem);
1290 if (err) {
1291 nvgpu_err(g, "%s: memory allocation failed", __func__);
1292 goto clean_up;
1293 }
1294
1295 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1296 c->gpfifo.pipe = nvgpu_big_malloc(g,
1297 gpfifo_size * gpfifo_entry_size);
1298 if (!c->gpfifo.pipe) {
1299 err = -ENOMEM;
1300 goto clean_up_unmap;
1301 }
1302 }
1303
1304 c->gpfifo.entry_num = gpfifo_size;
1305 c->gpfifo.get = c->gpfifo.put = 0;
1306
1307 gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1308 c->chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1309
1310 g->ops.fifo.setup_userd(c);
1311
1312 if (!g->aggressive_sync_destroy_thresh) {
1313 nvgpu_mutex_acquire(&c->sync_lock);
1314 c->sync = gk20a_channel_sync_create(c);
1315 if (!c->sync) {
1316 err = -ENOMEM;
1317 nvgpu_mutex_release(&c->sync_lock);
1318 goto clean_up_unmap;
1319 }
1320 nvgpu_mutex_release(&c->sync_lock);
1321
1322 if (g->ops.fifo.resetup_ramfc) {
1323 err = g->ops.fifo.resetup_ramfc(c);
1324 if (err)
1325 goto clean_up_sync;
1326 }
1327 }
1328
1329 if (!c->g->timeouts_enabled || !c->wdt_enabled)
1330 acquire_timeout = 0;
1331 else
1332 acquire_timeout = gk20a_get_channel_watchdog_timeout(c);
1333
1334 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1335 c->gpfifo.entry_num,
1336 acquire_timeout, flags);
1337 if (err)
1338 goto clean_up_sync;
1339
1340 /* TBD: setup engine contexts */
1341
1342 if (num_inflight_jobs) {
1343 err = channel_gk20a_prealloc_resources(c,
1344 num_inflight_jobs);
1345 if (err)
1346 goto clean_up_sync;
1347 }
1348
1349 err = channel_gk20a_alloc_priv_cmdbuf(c);
1350 if (err)
1351 goto clean_up_prealloc;
1352
1353 err = channel_gk20a_update_runlist(c, true);
1354 if (err)
1355 goto clean_up_priv_cmd;
1356
1357 g->ops.fifo.bind_channel(c);
1358
1359 gk20a_dbg_fn("done");
1360 return 0;
1361
1362clean_up_priv_cmd:
1363 channel_gk20a_free_priv_cmdbuf(c);
1364clean_up_prealloc:
1365 if (num_inflight_jobs)
1366 channel_gk20a_free_prealloc_resources(c);
1367clean_up_sync:
1368 if (c->sync) {
1369 gk20a_channel_sync_destroy(c->sync);
1370 c->sync = NULL;
1371 }
1372clean_up_unmap:
1373 nvgpu_big_free(g, c->gpfifo.pipe);
1374 nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem);
1375clean_up:
1376 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1377clean_up_idle:
1378 if (c->deterministic) {
1379 nvgpu_rwsem_down_read(&g->deterministic_busy);
1380 gk20a_idle(g);
1381 c->deterministic = false;
1382 nvgpu_rwsem_up_read(&g->deterministic_busy);
1383 }
1384 nvgpu_err(g, "fail");
1385 return err;
1386}
1387
1388/* Update with this periodically to determine how the gpfifo is draining. */
1389static inline u32 update_gp_get(struct gk20a *g,
1390 struct channel_gk20a *c)
1391{
1392 u32 new_get = g->ops.fifo.userd_gp_get(g, c);
1393
1394 if (new_get < c->gpfifo.get)
1395 c->gpfifo.wrap = !c->gpfifo.wrap;
1396 c->gpfifo.get = new_get;
1397 return new_get;
1398}
1399
1400u32 nvgpu_gp_free_count(struct channel_gk20a *c)
1401{
1402 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1403 c->gpfifo.entry_num;
1404}
1405
1406bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1407 u32 timeout_delta_ms, bool *progress)
1408{
1409 u32 gpfifo_get = update_gp_get(ch->g, ch);
1410
1411 /* Count consequent timeout isr */
1412 if (gpfifo_get == ch->timeout_gpfifo_get) {
1413 /* we didn't advance since previous channel timeout check */
1414 ch->timeout_accumulated_ms += timeout_delta_ms;
1415 *progress = false;
1416 } else {
1417 /* first timeout isr encountered */
1418 ch->timeout_accumulated_ms = timeout_delta_ms;
1419 *progress = true;
1420 }
1421
1422 ch->timeout_gpfifo_get = gpfifo_get;
1423
1424 return ch->g->timeouts_enabled &&
1425 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1426}
1427
1428static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch)
1429{
1430 return ch->g->ch_wdt_timeout_ms;
1431}
1432
1433u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
1434{
1435 update_gp_get(c->g, c);
1436 return nvgpu_gp_free_count(c);
1437}
1438
1439static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
1440{
1441 ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch);
1442 ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch);
1443 ch->timeout.running = true;
1444 nvgpu_timeout_init(ch->g, &ch->timeout.timer,
1445 gk20a_get_channel_watchdog_timeout(ch),
1446 NVGPU_TIMER_CPU_TIMER);
1447}
1448
1449/**
1450 * Start a timeout counter (watchdog) on this channel.
1451 *
1452 * Trigger a watchdog to recover the channel after the per-platform timeout
1453 * duration (but strictly no earlier) if the channel hasn't advanced within
1454 * that time.
1455 *
1456 * If the timeout is already running, do nothing. This should be called when
1457 * new jobs are submitted. The timeout will stop when the last tracked job
1458 * finishes, making the channel idle.
1459 *
1460 * The channel's gpfifo read pointer will be used to determine if the job has
1461 * actually stuck at that time. After the timeout duration has expired, a
1462 * worker thread will consider the channel stuck and recover it if stuck.
1463 */
1464static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
1465{
1466 if (!ch->g->timeouts_enabled || !gk20a_get_channel_watchdog_timeout(ch))
1467 return;
1468
1469 if (!ch->wdt_enabled)
1470 return;
1471
1472 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1473
1474 if (ch->timeout.running) {
1475 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1476 return;
1477 }
1478 __gk20a_channel_timeout_start(ch);
1479 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1480}
1481
1482/**
1483 * Stop a running timeout counter (watchdog) on this channel.
1484 *
1485 * Make the watchdog consider the channel not running, so that it won't get
1486 * recovered even if no progress is detected. Progress is not tracked if the
1487 * watchdog is turned off.
1488 *
1489 * No guarantees are made about concurrent execution of the timeout handler.
1490 * (This should be called from an update handler running in the same thread
1491 * with the watchdog.)
1492 */
1493static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
1494{
1495 bool was_running;
1496
1497 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1498 was_running = ch->timeout.running;
1499 ch->timeout.running = false;
1500 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1501 return was_running;
1502}
1503
1504/**
1505 * Continue a previously stopped timeout
1506 *
1507 * Enable the timeout again but don't reinitialize its timer.
1508 *
1509 * No guarantees are made about concurrent execution of the timeout handler.
1510 * (This should be called from an update handler running in the same thread
1511 * with the watchdog.)
1512 */
1513static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
1514{
1515 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1516 ch->timeout.running = true;
1517 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1518}
1519
1520/**
1521 * Rewind the timeout on each non-dormant channel.
1522 *
1523 * Reschedule the timeout of each active channel for which timeouts are running
1524 * as if something was happened on each channel right now. This should be
1525 * called when a global hang is detected that could cause a false positive on
1526 * other innocent channels.
1527 */
1528void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
1529{
1530 struct fifo_gk20a *f = &g->fifo;
1531 u32 chid;
1532
1533 for (chid = 0; chid < f->num_channels; chid++) {
1534 struct channel_gk20a *ch = &f->channel[chid];
1535
1536 if (!gk20a_channel_get(ch))
1537 continue;
1538
1539 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1540 if (ch->timeout.running)
1541 __gk20a_channel_timeout_start(ch);
1542 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1543
1544 gk20a_channel_put(ch);
1545 }
1546}
1547
1548/**
1549 * Check if a timed out channel has hung and recover it if it has.
1550 *
1551 * Test if this channel has really got stuck at this point (should be called
1552 * when the watchdog timer has expired) by checking if its gp_get has advanced
1553 * or not. If no gp_get action happened since when the watchdog was started,
1554 * force-reset the channel.
1555 *
1556 * The gpu is implicitly on at this point, because the watchdog can only run on
1557 * channels that have submitted jobs pending for cleanup.
1558 */
1559static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
1560{
1561 struct gk20a *g = ch->g;
1562 u32 gp_get;
1563 u32 new_gp_get;
1564 u64 pb_get;
1565 u64 new_pb_get;
1566
1567 gk20a_dbg_fn("");
1568
1569 /* Get status and clear the timer */
1570 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1571 gp_get = ch->timeout.gp_get;
1572 pb_get = ch->timeout.pb_get;
1573 ch->timeout.running = false;
1574 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1575
1576 new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch);
1577 new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch);
1578
1579 if (new_gp_get != gp_get || new_pb_get != pb_get) {
1580 /* Channel has advanced, reschedule */
1581 gk20a_channel_timeout_start(ch);
1582 return;
1583 }
1584
1585 nvgpu_err(g, "Job on channel %d timed out",
1586 ch->chid);
1587
1588 gk20a_debug_dump(g);
1589 gk20a_gr_debug_dump(g);
1590
1591 g->ops.fifo.force_reset_ch(ch,
1592 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
1593}
1594
1595/**
1596 * Test if the per-channel timeout is expired and handle the timeout in that case.
1597 *
1598 * Each channel has an expiration time based watchdog. The timer is
1599 * (re)initialized in two situations: when a new job is submitted on an idle
1600 * channel and when the timeout is checked but progress is detected.
1601 *
1602 * Watchdog timeout does not yet necessarily mean a stuck channel so this may
1603 * or may not cause recovery.
1604 *
1605 * The timeout is stopped (disabled) after the last job in a row finishes
1606 * making the channel idle.
1607 */
1608static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
1609{
1610 bool timed_out;
1611
1612 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1613 timed_out = ch->timeout.running &&
1614 nvgpu_timeout_peek_expired(&ch->timeout.timer);
1615 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1616
1617 if (timed_out)
1618 gk20a_channel_timeout_handler(ch);
1619}
1620
1621/**
1622 * Loop every living channel, check timeouts and handle stuck channels.
1623 */
1624static void gk20a_channel_poll_timeouts(struct gk20a *g)
1625{
1626 unsigned int chid;
1627
1628
1629 for (chid = 0; chid < g->fifo.num_channels; chid++) {
1630 struct channel_gk20a *ch = &g->fifo.channel[chid];
1631
1632 if (gk20a_channel_get(ch)) {
1633 gk20a_channel_timeout_check(ch);
1634 gk20a_channel_put(ch);
1635 }
1636 }
1637}
1638
1639/*
1640 * Process one scheduled work item for this channel. Currently, the only thing
1641 * the worker does is job cleanup handling.
1642 */
1643static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch)
1644{
1645 gk20a_dbg_fn("");
1646
1647 gk20a_channel_clean_up_jobs(ch, true);
1648
1649 /* ref taken when enqueued */
1650 gk20a_channel_put(ch);
1651}
1652
1653/**
1654 * Tell the worker that one more work needs to be done.
1655 *
1656 * Increase the work counter to synchronize the worker with the new work. Wake
1657 * up the worker. If the worker was already running, it will handle this work
1658 * before going to sleep.
1659 */
1660static int __gk20a_channel_worker_wakeup(struct gk20a *g)
1661{
1662 int put;
1663
1664 gk20a_dbg_fn("");
1665
1666 /*
1667 * Currently, the only work type is associated with a lock, which deals
1668 * with any necessary barriers. If a work type with no locking were
1669 * added, a nvgpu_smp_wmb() would be needed here. See
1670 * ..worker_pending() for a pair.
1671 */
1672
1673 put = nvgpu_atomic_inc_return(&g->channel_worker.put);
1674 nvgpu_cond_signal_interruptible(&g->channel_worker.wq);
1675
1676 return put;
1677}
1678
1679/**
1680 * Test if there is some work pending.
1681 *
1682 * This is a pair for __gk20a_channel_worker_wakeup to be called from the
1683 * worker. The worker has an internal work counter which is incremented once
1684 * per finished work item. This is compared with the number of queued jobs,
1685 * which may be channels on the items list or any other types of work.
1686 */
1687static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
1688{
1689 bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
1690
1691 /*
1692 * This would be the place for a nvgpu_smp_rmb() pairing
1693 * a nvgpu_smp_wmb() for a wakeup if we had any work with
1694 * no implicit barriers caused by locking.
1695 */
1696
1697 return pending;
1698}
1699
1700/**
1701 * Process the queued works for the worker thread serially.
1702 *
1703 * Flush all the work items in the queue one by one. This may block timeout
1704 * handling for a short while, as these are serialized.
1705 */
1706static void gk20a_channel_worker_process(struct gk20a *g, int *get)
1707{
1708
1709 while (__gk20a_channel_worker_pending(g, *get)) {
1710 struct channel_gk20a *ch = NULL;
1711
1712 /*
1713 * If a channel is on the list, it's guaranteed to be handled
1714 * eventually just once. However, the opposite is not true. A
1715 * channel may be being processed if it's on the list or not.
1716 *
1717 * With this, processing channel works should be conservative
1718 * as follows: it's always safe to look at a channel found in
1719 * the list, and if someone enqueues the channel, it will be
1720 * handled eventually, even if it's being handled at the same
1721 * time. A channel is on the list only once; multiple calls to
1722 * enqueue are harmless.
1723 */
1724 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
1725 if (!nvgpu_list_empty(&g->channel_worker.items)) {
1726 ch = nvgpu_list_first_entry(&g->channel_worker.items,
1727 channel_gk20a,
1728 worker_item);
1729 nvgpu_list_del(&ch->worker_item);
1730 }
1731 nvgpu_spinlock_release(&g->channel_worker.items_lock);
1732
1733 if (!ch) {
1734 /*
1735 * Woke up for some other reason, but there are no
1736 * other reasons than a channel added in the items list
1737 * currently, so warn and ack the message.
1738 */
1739 nvgpu_warn(g, "Spurious worker event!");
1740 ++*get;
1741 break;
1742 }
1743
1744 gk20a_channel_worker_process_ch(ch);
1745 ++*get;
1746 }
1747}
1748
1749/*
1750 * Look at channel states periodically, until canceled. Abort timed out
1751 * channels serially. Process all work items found in the queue.
1752 */
1753static int gk20a_channel_poll_worker(void *arg)
1754{
1755 struct gk20a *g = (struct gk20a *)arg;
1756 struct gk20a_channel_worker *worker = &g->channel_worker;
1757 unsigned long watchdog_interval = 100; /* milliseconds */
1758 struct nvgpu_timeout timeout;
1759 int get = 0;
1760
1761 gk20a_dbg_fn("");
1762
1763 nvgpu_timeout_init(g, &timeout, watchdog_interval,
1764 NVGPU_TIMER_CPU_TIMER);
1765 while (!nvgpu_thread_should_stop(&worker->poll_task)) {
1766 int ret;
1767
1768 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
1769 &worker->wq,
1770 __gk20a_channel_worker_pending(g, get),
1771 watchdog_interval) > 0;
1772
1773 if (ret == 0)
1774 gk20a_channel_worker_process(g, &get);
1775
1776 if (nvgpu_timeout_peek_expired(&timeout)) {
1777 gk20a_channel_poll_timeouts(g);
1778 nvgpu_timeout_init(g, &timeout, watchdog_interval,
1779 NVGPU_TIMER_CPU_TIMER);
1780 }
1781 }
1782 return 0;
1783}
1784
1785static int __nvgpu_channel_worker_start(struct gk20a *g)
1786{
1787 char thread_name[64];
1788 int err = 0;
1789
1790 if (nvgpu_thread_is_running(&g->channel_worker.poll_task))
1791 return err;
1792
1793 nvgpu_mutex_acquire(&g->channel_worker.start_lock);
1794
1795 /*
1796 * We don't want to grab a mutex on every channel update so we check
1797 * again if the worker has been initialized before creating a new thread
1798 */
1799
1800 /*
1801 * Mutexes have implicit barriers, so there is no risk of a thread
1802 * having a stale copy of the poll_task variable as the call to
1803 * thread_is_running is volatile
1804 */
1805
1806 if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) {
1807 nvgpu_mutex_release(&g->channel_worker.start_lock);
1808 return err;
1809 }
1810
1811 snprintf(thread_name, sizeof(thread_name),
1812 "nvgpu_channel_poll_%s", g->name);
1813
1814 err = nvgpu_thread_create(&g->channel_worker.poll_task, g,
1815 gk20a_channel_poll_worker, thread_name);
1816
1817 nvgpu_mutex_release(&g->channel_worker.start_lock);
1818 return err;
1819}
1820/**
1821 * Initialize the channel worker's metadata and start the background thread.
1822 */
1823int nvgpu_channel_worker_init(struct gk20a *g)
1824{
1825 int err;
1826
1827 nvgpu_atomic_set(&g->channel_worker.put, 0);
1828 nvgpu_cond_init(&g->channel_worker.wq);
1829 nvgpu_init_list_node(&g->channel_worker.items);
1830 nvgpu_spinlock_init(&g->channel_worker.items_lock);
1831 err = nvgpu_mutex_init(&g->channel_worker.start_lock);
1832 if (err)
1833 goto error_check;
1834
1835 err = __nvgpu_channel_worker_start(g);
1836error_check:
1837 if (err) {
1838 nvgpu_err(g, "failed to start channel poller thread");
1839 return err;
1840 }
1841 return 0;
1842}
1843
1844void nvgpu_channel_worker_deinit(struct gk20a *g)
1845{
1846 nvgpu_mutex_acquire(&g->channel_worker.start_lock);
1847 nvgpu_thread_stop(&g->channel_worker.poll_task);
1848 nvgpu_mutex_release(&g->channel_worker.start_lock);
1849}
1850
1851/**
1852 * Append a channel to the worker's list, if not there already.
1853 *
1854 * The worker thread processes work items (channels in its work list) and polls
1855 * for other things. This adds @ch to the end of the list and wakes the worker
1856 * up immediately. If the channel already existed in the list, it's not added,
1857 * because in that case it has been scheduled already but has not yet been
1858 * processed.
1859 */
1860static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
1861{
1862 struct gk20a *g = ch->g;
1863
1864 gk20a_dbg_fn("");
1865
1866 /*
1867 * Warn if worker thread cannot run
1868 */
1869 if (WARN_ON(__nvgpu_channel_worker_start(g))) {
1870 nvgpu_warn(g, "channel worker cannot run!");
1871 return;
1872 }
1873
1874 /*
1875 * Ref released when this item gets processed. The caller should hold
1876 * one ref already, so normally shouldn't fail, but the channel could
1877 * end up being freed between the time the caller got its reference and
1878 * the time we end up here (e.g., if the client got killed); if so, just
1879 * return.
1880 */
1881 if (!gk20a_channel_get(ch)) {
1882 nvgpu_info(g, "cannot get ch ref for worker!");
1883 return;
1884 }
1885
1886 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
1887 if (!nvgpu_list_empty(&ch->worker_item)) {
1888 /*
1889 * Already queued, so will get processed eventually.
1890 * The worker is probably awake already.
1891 */
1892 nvgpu_spinlock_release(&g->channel_worker.items_lock);
1893 gk20a_channel_put(ch);
1894 return;
1895 }
1896 nvgpu_list_add_tail(&ch->worker_item, &g->channel_worker.items);
1897 nvgpu_spinlock_release(&g->channel_worker.items_lock);
1898
1899 __gk20a_channel_worker_wakeup(g);
1900}
1901
1902int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
1903{
1904 struct priv_cmd_queue *q = &c->priv_cmd_q;
1905 struct gk20a *g = c->g;
1906
1907 if (!e)
1908 return 0;
1909
1910 if (e->valid) {
1911 /* read the entry's valid flag before reading its contents */
1912 nvgpu_smp_rmb();
1913 if ((q->get != e->off) && e->off != 0)
1914 nvgpu_err(g, "requests out-of-order, ch=%d",
1915 c->chid);
1916 q->get = e->off + e->size;
1917 }
1918
1919 free_priv_cmdbuf(c, e);
1920
1921 return 0;
1922}
1923
1924int gk20a_channel_add_job(struct channel_gk20a *c,
1925 struct channel_gk20a_job *job,
1926 bool skip_buffer_refcounting)
1927{
1928 struct vm_gk20a *vm = c->vm;
1929 struct nvgpu_mapped_buf **mapped_buffers = NULL;
1930 int err = 0, num_mapped_buffers = 0;
1931 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
1932
1933 if (!skip_buffer_refcounting) {
1934 err = nvgpu_vm_get_buffers(vm, &mapped_buffers,
1935 &num_mapped_buffers);
1936 if (err)
1937 return err;
1938 }
1939
1940 /*
1941 * Ref to hold the channel open during the job lifetime. This is
1942 * released by job cleanup launched via syncpt or sema interrupt.
1943 */
1944 c = gk20a_channel_get(c);
1945
1946 if (c) {
1947 job->num_mapped_buffers = num_mapped_buffers;
1948 job->mapped_buffers = mapped_buffers;
1949
1950 gk20a_channel_timeout_start(c);
1951
1952 if (!pre_alloc_enabled)
1953 channel_gk20a_joblist_lock(c);
1954
1955 /*
1956 * ensure all pending write complete before adding to the list.
1957 * see corresponding nvgpu_smp_rmb in
1958 * gk20a_channel_clean_up_jobs() &
1959 * gk20a_channel_abort_clean_up()
1960 */
1961 nvgpu_smp_wmb();
1962 channel_gk20a_joblist_add(c, job);
1963
1964 if (!pre_alloc_enabled)
1965 channel_gk20a_joblist_unlock(c);
1966 } else {
1967 err = -ETIMEDOUT;
1968 goto err_put_buffers;
1969 }
1970
1971 return 0;
1972
1973err_put_buffers:
1974 nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1975
1976 return err;
1977}
1978
1979/**
1980 * Clean up job resources for further jobs to use.
1981 * @clean_all: If true, process as many jobs as possible, otherwise just one.
1982 *
1983 * Loop all jobs from the joblist until a pending job is found, or just one if
1984 * clean_all is not set. Pending jobs are detected from the job's post fence,
1985 * so this is only done for jobs that have job tracking resources. Free all
1986 * per-job memory for completed jobs; in case of preallocated resources, this
1987 * opens up slots for new jobs to be submitted.
1988 */
1989void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
1990 bool clean_all)
1991{
1992 struct vm_gk20a *vm;
1993 struct channel_gk20a_job *job;
1994 struct gk20a *g;
1995 int job_finished = 0;
1996 bool watchdog_on = false;
1997
1998 c = gk20a_channel_get(c);
1999 if (!c)
2000 return;
2001
2002 if (!c->g->power_on) { /* shutdown case */
2003 gk20a_channel_put(c);
2004 return;
2005 }
2006
2007 vm = c->vm;
2008 g = c->g;
2009
2010 /*
2011 * If !clean_all, we're in a condition where watchdog isn't supported
2012 * anyway (this would be a no-op).
2013 */
2014 if (clean_all)
2015 watchdog_on = gk20a_channel_timeout_stop(c);
2016
2017 /* Synchronize with abort cleanup that needs the jobs. */
2018 nvgpu_mutex_acquire(&c->joblist.cleanup_lock);
2019
2020 while (1) {
2021 bool completed;
2022
2023 channel_gk20a_joblist_lock(c);
2024 if (channel_gk20a_joblist_is_empty(c)) {
2025 /*
2026 * No jobs in flight, timeout will remain stopped until
2027 * new jobs are submitted.
2028 */
2029 channel_gk20a_joblist_unlock(c);
2030 break;
2031 }
2032
2033 /*
2034 * ensure that all subsequent reads occur after checking
2035 * that we have a valid node. see corresponding nvgpu_smp_wmb in
2036 * gk20a_channel_add_job().
2037 */
2038 nvgpu_smp_rmb();
2039 job = channel_gk20a_joblist_peek(c);
2040 channel_gk20a_joblist_unlock(c);
2041
2042 completed = gk20a_fence_is_expired(job->post_fence);
2043 if (!completed) {
2044 /*
2045 * The watchdog eventually sees an updated gp_get if
2046 * something happened in this loop. A new job can have
2047 * been submitted between the above call to stop and
2048 * this - in that case, this is a no-op and the new
2049 * later timeout is still used.
2050 */
2051 if (clean_all && watchdog_on)
2052 gk20a_channel_timeout_continue(c);
2053 break;
2054 }
2055
2056 WARN_ON(!c->sync);
2057
2058 if (c->sync) {
2059 c->sync->signal_timeline(c->sync);
2060
2061 if (g->aggressive_sync_destroy_thresh) {
2062 nvgpu_mutex_acquire(&c->sync_lock);
2063 if (nvgpu_atomic_dec_and_test(
2064 &c->sync->refcount) &&
2065 g->aggressive_sync_destroy) {
2066 gk20a_channel_sync_destroy(c->sync);
2067 c->sync = NULL;
2068 }
2069 nvgpu_mutex_release(&c->sync_lock);
2070 }
2071 }
2072
2073 if (job->num_mapped_buffers)
2074 nvgpu_vm_put_buffers(vm, job->mapped_buffers,
2075 job->num_mapped_buffers);
2076
2077 /* Remove job from channel's job list before we close the
2078 * fences, to prevent other callers (gk20a_channel_abort) from
2079 * trying to dereference post_fence when it no longer exists.
2080 */
2081 channel_gk20a_joblist_lock(c);
2082 channel_gk20a_joblist_delete(c, job);
2083 channel_gk20a_joblist_unlock(c);
2084
2085 /* Close the fences (this will unref the semaphores and release
2086 * them to the pool). */
2087 gk20a_fence_put(job->pre_fence);
2088 gk20a_fence_put(job->post_fence);
2089
2090 /* Free the private command buffers (wait_cmd first and
2091 * then incr_cmd i.e. order of allocation) */
2092 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
2093 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
2094
2095 /* another bookkeeping taken in add_job. caller must hold a ref
2096 * so this wouldn't get freed here. */
2097 gk20a_channel_put(c);
2098
2099 /*
2100 * ensure all pending writes complete before freeing up the job.
2101 * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job().
2102 */
2103 nvgpu_smp_wmb();
2104
2105 channel_gk20a_free_job(c, job);
2106 job_finished = 1;
2107
2108 /*
2109 * Deterministic channels have a channel-wide power reference;
2110 * for others, there's one per submit.
2111 */
2112 if (!c->deterministic)
2113 gk20a_idle(g);
2114
2115 if (!clean_all) {
2116 /* Timeout isn't supported here so don't touch it. */
2117 break;
2118 }
2119 }
2120
2121 nvgpu_mutex_release(&c->joblist.cleanup_lock);
2122
2123 if (job_finished && c->update_fn)
2124 schedule_work(&c->update_fn_work);
2125
2126 gk20a_channel_put(c);
2127}
2128
2129/**
2130 * Schedule a job cleanup work on this channel to free resources and to signal
2131 * about completion.
2132 *
2133 * Call this when there has been an interrupt about finished jobs, or when job
2134 * cleanup needs to be performed, e.g., when closing a channel. This is always
2135 * safe to call even if there is nothing to clean up. Any visible actions on
2136 * jobs just before calling this are guaranteed to be processed.
2137 */
2138void gk20a_channel_update(struct channel_gk20a *c)
2139{
2140 if (!c->g->power_on) { /* shutdown case */
2141 return;
2142 }
2143
2144 trace_gk20a_channel_update(c->chid);
2145 /* A queued channel is always checked for job cleanup. */
2146 gk20a_channel_worker_enqueue(c);
2147}
2148
2149/*
2150 * Stop deterministic channel activity for do_idle() when power needs to go off
2151 * momentarily but deterministic channels keep power refs for potentially a
2152 * long time.
2153 *
2154 * Takes write access on g->deterministic_busy.
2155 *
2156 * Must be paired with gk20a_channel_deterministic_unidle().
2157 */
2158void gk20a_channel_deterministic_idle(struct gk20a *g)
2159{
2160 struct fifo_gk20a *f = &g->fifo;
2161 u32 chid;
2162
2163 /* Grab exclusive access to the hw to block new submits */
2164 nvgpu_rwsem_down_write(&g->deterministic_busy);
2165
2166 for (chid = 0; chid < f->num_channels; chid++) {
2167 struct channel_gk20a *ch = &f->channel[chid];
2168
2169 if (!gk20a_channel_get(ch))
2170 continue;
2171
2172 if (ch->deterministic) {
2173 /*
2174 * Drop the power ref taken when setting deterministic
2175 * flag. deterministic_unidle will put this and the
2176 * channel ref back.
2177 *
2178 * Hold the channel ref: it must not get freed in
2179 * between. A race could otherwise result in lost
2180 * gk20a_busy() via unidle, and in unbalanced
2181 * gk20a_idle() via closing the channel.
2182 */
2183 gk20a_idle(g);
2184 } else {
2185 /* Not interesting, carry on. */
2186 gk20a_channel_put(ch);
2187 }
2188 }
2189}
2190
2191/*
2192 * Allow deterministic channel activity again for do_unidle().
2193 *
2194 * This releases write access on g->deterministic_busy.
2195 */
2196void gk20a_channel_deterministic_unidle(struct gk20a *g)
2197{
2198 struct fifo_gk20a *f = &g->fifo;
2199 u32 chid;
2200
2201 for (chid = 0; chid < f->num_channels; chid++) {
2202 struct channel_gk20a *ch = &f->channel[chid];
2203
2204 if (!gk20a_channel_get(ch))
2205 continue;
2206
2207 /*
2208 * Deterministic state changes inside deterministic_busy lock,
2209 * which we took in deterministic_idle.
2210 */
2211 if (ch->deterministic) {
2212 if (gk20a_busy(g))
2213 nvgpu_err(g, "cannot busy() again!");
2214 /* Took this in idle() */
2215 gk20a_channel_put(ch);
2216 }
2217
2218 gk20a_channel_put(ch);
2219 }
2220
2221 /* Release submits, new deterministic channels and frees */
2222 nvgpu_rwsem_up_write(&g->deterministic_busy);
2223}
2224
2225int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2226{
2227 struct channel_gk20a *c = g->fifo.channel+chid;
2228 int err;
2229
2230 c->g = NULL;
2231 c->chid = chid;
2232 nvgpu_atomic_set(&c->bound, false);
2233 nvgpu_spinlock_init(&c->ref_obtain_lock);
2234 nvgpu_atomic_set(&c->ref_count, 0);
2235 c->referenceable = false;
2236 nvgpu_cond_init(&c->ref_count_dec_wq);
2237
2238#if GK20A_CHANNEL_REFCOUNT_TRACKING
2239 nvgpu_spinlock_init(&c->ref_actions_lock);
2240#endif
2241 nvgpu_spinlock_init(&c->joblist.dynamic.lock);
2242 nvgpu_raw_spinlock_init(&c->timeout.lock);
2243
2244 nvgpu_init_list_node(&c->joblist.dynamic.jobs);
2245 nvgpu_init_list_node(&c->dbg_s_list);
2246 nvgpu_init_list_node(&c->event_id_list);
2247 nvgpu_init_list_node(&c->worker_item);
2248
2249 err = nvgpu_mutex_init(&c->ioctl_lock);
2250 if (err)
2251 return err;
2252 err = nvgpu_mutex_init(&c->error_notifier_mutex);
2253 if (err)
2254 goto fail_1;
2255 err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
2256 if (err)
2257 goto fail_2;
2258 err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
2259 if (err)
2260 goto fail_3;
2261 err = nvgpu_mutex_init(&c->sync_lock);
2262 if (err)
2263 goto fail_4;
2264#if defined(CONFIG_GK20A_CYCLE_STATS)
2265 err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2266 if (err)
2267 goto fail_5;
2268 err = nvgpu_mutex_init(&c->cs_client_mutex);
2269 if (err)
2270 goto fail_6;
2271#endif
2272 err = nvgpu_mutex_init(&c->event_id_list_lock);
2273 if (err)
2274 goto fail_7;
2275 err = nvgpu_mutex_init(&c->dbg_s_lock);
2276 if (err)
2277 goto fail_8;
2278
2279 nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
2280
2281 return 0;
2282
2283fail_8:
2284 nvgpu_mutex_destroy(&c->event_id_list_lock);
2285fail_7:
2286#if defined(CONFIG_GK20A_CYCLE_STATS)
2287 nvgpu_mutex_destroy(&c->cs_client_mutex);
2288fail_6:
2289 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
2290fail_5:
2291#endif
2292 nvgpu_mutex_destroy(&c->sync_lock);
2293fail_4:
2294 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
2295fail_3:
2296 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
2297fail_2:
2298 nvgpu_mutex_destroy(&c->error_notifier_mutex);
2299fail_1:
2300 nvgpu_mutex_destroy(&c->ioctl_lock);
2301
2302 return err;
2303}
2304
2305/* in this context the "channel" is the host1x channel which
2306 * maps to *all* gk20a channels */
2307int gk20a_channel_suspend(struct gk20a *g)
2308{
2309 struct fifo_gk20a *f = &g->fifo;
2310 u32 chid;
2311 bool channels_in_use = false;
2312 u32 active_runlist_ids = 0;
2313
2314 gk20a_dbg_fn("");
2315
2316 for (chid = 0; chid < f->num_channels; chid++) {
2317 struct channel_gk20a *ch = &f->channel[chid];
2318 if (gk20a_channel_get(ch)) {
2319 gk20a_dbg_info("suspend channel %d", chid);
2320 /* disable channel */
2321 gk20a_disable_channel_tsg(g, ch);
2322 /* preempt the channel */
2323 gk20a_fifo_preempt(g, ch);
2324 /* wait for channel update notifiers */
2325 if (ch->update_fn)
2326 cancel_work_sync(&ch->update_fn_work);
2327
2328 channels_in_use = true;
2329
2330 active_runlist_ids |= BIT(ch->runlist_id);
2331
2332 gk20a_channel_put(ch);
2333 }
2334 }
2335
2336 if (channels_in_use) {
2337 gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true);
2338
2339 for (chid = 0; chid < f->num_channels; chid++) {
2340 if (gk20a_channel_get(&f->channel[chid])) {
2341 g->ops.fifo.unbind_channel(&f->channel[chid]);
2342 gk20a_channel_put(&f->channel[chid]);
2343 }
2344 }
2345 }
2346
2347 gk20a_dbg_fn("done");
2348 return 0;
2349}
2350
2351int gk20a_channel_resume(struct gk20a *g)
2352{
2353 struct fifo_gk20a *f = &g->fifo;
2354 u32 chid;
2355 bool channels_in_use = false;
2356 u32 active_runlist_ids = 0;
2357
2358 gk20a_dbg_fn("");
2359
2360 for (chid = 0; chid < f->num_channels; chid++) {
2361 if (gk20a_channel_get(&f->channel[chid])) {
2362 gk20a_dbg_info("resume channel %d", chid);
2363 g->ops.fifo.bind_channel(&f->channel[chid]);
2364 channels_in_use = true;
2365 active_runlist_ids |= BIT(f->channel[chid].runlist_id);
2366 gk20a_channel_put(&f->channel[chid]);
2367 }
2368 }
2369
2370 if (channels_in_use)
2371 gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true);
2372
2373 gk20a_dbg_fn("done");
2374 return 0;
2375}
2376
2377void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
2378{
2379 struct fifo_gk20a *f = &g->fifo;
2380 u32 chid;
2381
2382 gk20a_dbg_fn("");
2383
2384 /*
2385 * Ensure that all pending writes are actually done before trying to
2386 * read semaphore values from DRAM.
2387 */
2388 g->ops.mm.fb_flush(g);
2389
2390 for (chid = 0; chid < f->num_channels; chid++) {
2391 struct channel_gk20a *c = g->fifo.channel+chid;
2392 if (gk20a_channel_get(c)) {
2393 if (nvgpu_atomic_read(&c->bound)) {
2394 nvgpu_cond_broadcast_interruptible(
2395 &c->semaphore_wq);
2396 if (post_events) {
2397 if (gk20a_is_channel_marked_as_tsg(c)) {
2398 struct tsg_gk20a *tsg =
2399 &g->fifo.tsg[c->tsgid];
2400
2401 gk20a_tsg_event_id_post_event(tsg,
2402 NVGPU_EVENT_ID_BLOCKING_SYNC);
2403 } else {
2404 gk20a_channel_event_id_post_event(c,
2405 NVGPU_EVENT_ID_BLOCKING_SYNC);
2406 }
2407 }
2408 /*
2409 * Only non-deterministic channels get the
2410 * channel_update callback. We don't allow
2411 * semaphore-backed syncs for these channels
2412 * anyways, since they have a dependency on
2413 * the sync framework.
2414 * If deterministic channels are receiving a
2415 * semaphore wakeup, it must be for a
2416 * user-space managed
2417 * semaphore.
2418 */
2419 if (!c->deterministic)
2420 gk20a_channel_update(c);
2421 }
2422 gk20a_channel_put(c);
2423 }
2424 }
2425}