summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/fifo
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-08-21 05:27:07 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-08-24 17:57:38 -0400
commit0c387d76dcc7e665255200ba8d98b9abb11cb4a1 (patch)
treea41f3dc117a8b4981ba0dc0e89efc9818d54ff09 /drivers/gpu/nvgpu/common/fifo
parentf062cc5b24554f6ae67abbe846e6d6e8c15c4ffc (diff)
gpu: nvgpu: move channel code to common
Do a simple rename of channel_gk20a.c to common/fifo/channel.c. Header cleanup and the like will soon follow. Also rename the os-specific files to have unique names across directories because tmake requires that. Jira NVGPU-967 Change-Id: I302bbbbe29735264e832378d444a176a4023e3e1 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1804608 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: Richard Zhao <rizhao@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/fifo')
-rw-r--r--drivers/gpu/nvgpu/common/fifo/channel.c2262
1 files changed, 2262 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
new file mode 100644
index 00000000..5966e191
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -0,0 +1,2262 @@
1/*
2 * GK20A Graphics channel
3 *
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26
27#include <nvgpu/semaphore.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/dma.h>
31#include <nvgpu/log.h>
32#include <nvgpu/atomic.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/list.h>
35#include <nvgpu/circ_buf.h>
36#include <nvgpu/cond.h>
37#include <nvgpu/enabled.h>
38#include <nvgpu/debug.h>
39#include <nvgpu/ltc.h>
40#include <nvgpu/barrier.h>
41#include <nvgpu/ctxsw_trace.h>
42#include <nvgpu/error_notifier.h>
43#include <nvgpu/os_sched.h>
44#include <nvgpu/log2.h>
45#include <nvgpu/ptimer.h>
46
47#include "gk20a/gk20a.h"
48#include "gk20a/dbg_gpu_gk20a.h"
49#include "gk20a/fence_gk20a.h"
50
51static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
52static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c);
53
54static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
55static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56
57static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
58
59static void channel_gk20a_joblist_add(struct channel_gk20a *c,
60 struct channel_gk20a_job *job);
61static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
62 struct channel_gk20a_job *job);
63static struct channel_gk20a_job *channel_gk20a_joblist_peek(
64 struct channel_gk20a *c);
65
66/* allocate GPU channel */
67static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
68{
69 struct channel_gk20a *ch = NULL;
70 struct gk20a *g = f->g;
71
72 nvgpu_mutex_acquire(&f->free_chs_mutex);
73 if (!nvgpu_list_empty(&f->free_chs)) {
74 ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a,
75 free_chs);
76 nvgpu_list_del(&ch->free_chs);
77 WARN_ON(nvgpu_atomic_read(&ch->ref_count));
78 WARN_ON(ch->referenceable);
79 f->used_channels++;
80 }
81 nvgpu_mutex_release(&f->free_chs_mutex);
82
83 if (g->aggressive_sync_destroy_thresh &&
84 (f->used_channels >
85 g->aggressive_sync_destroy_thresh))
86 g->aggressive_sync_destroy = true;
87
88 return ch;
89}
90
91static void free_channel(struct fifo_gk20a *f,
92 struct channel_gk20a *ch)
93{
94 struct gk20a *g = f->g;
95
96 trace_gk20a_release_used_channel(ch->chid);
97 /* refcount is zero here and channel is in a freed/dead state */
98 nvgpu_mutex_acquire(&f->free_chs_mutex);
99 /* add to head to increase visibility of timing-related bugs */
100 nvgpu_list_add(&ch->free_chs, &f->free_chs);
101 f->used_channels--;
102 nvgpu_mutex_release(&f->free_chs_mutex);
103
104 /*
105 * On teardown it is not possible to dereference platform, but ignoring
106 * this is fine then because no new channels would be created.
107 */
108 if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
109 if (g->aggressive_sync_destroy_thresh &&
110 (f->used_channels <
111 g->aggressive_sync_destroy_thresh))
112 g->aggressive_sync_destroy = false;
113 }
114}
115
116int channel_gk20a_commit_va(struct channel_gk20a *c)
117{
118 struct gk20a *g = c->g;
119
120 nvgpu_log_fn(g, " ");
121
122 g->ops.mm.init_inst_block(&c->inst_block, c->vm,
123 c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]);
124
125 return 0;
126}
127
128int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
129 unsigned int timeslice_period,
130 unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale)
131{
132 unsigned int value = scale_ptimer(timeslice_period,
133 ptimer_scalingfactor10x(g->ptimer_src_freq));
134 unsigned int shift = 0;
135
136 /* value field is 8 bits long */
137 while (value >= 1 << 8) {
138 value >>= 1;
139 shift++;
140 }
141
142 /* time slice register is only 18bits long */
143 if ((value << shift) >= 1<<19) {
144 nvgpu_err(g, "Requested timeslice value is clamped to 18 bits\n");
145 value = 255;
146 shift = 10;
147 }
148
149 *__timeslice_timeout = value;
150 *__timeslice_scale = shift;
151
152 return 0;
153}
154
155int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
156{
157 return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->chid, add, true);
158}
159
160int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
161{
162 struct tsg_gk20a *tsg;
163
164 if (gk20a_is_channel_marked_as_tsg(ch)) {
165 tsg = &g->fifo.tsg[ch->tsgid];
166 g->ops.fifo.enable_tsg(tsg);
167 } else {
168 g->ops.fifo.enable_channel(ch);
169 }
170
171 return 0;
172}
173
174int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
175{
176 struct tsg_gk20a *tsg;
177
178 if (gk20a_is_channel_marked_as_tsg(ch)) {
179 tsg = &g->fifo.tsg[ch->tsgid];
180 g->ops.fifo.disable_tsg(tsg);
181 } else {
182 g->ops.fifo.disable_channel(ch);
183 }
184
185 return 0;
186}
187
188void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
189{
190 /* synchronize with actual job cleanup */
191 nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
192
193 /* ensure no fences are pending */
194 nvgpu_mutex_acquire(&ch->sync_lock);
195 if (ch->sync)
196 ch->sync->set_min_eq_max(ch->sync);
197 if (ch->user_sync)
198 ch->user_sync->set_safe_state(ch->user_sync);
199 nvgpu_mutex_release(&ch->sync_lock);
200
201 nvgpu_mutex_release(&ch->joblist.cleanup_lock);
202
203 /*
204 * When closing the channel, this scheduled update holds one ref which
205 * is waited for before advancing with freeing.
206 */
207 gk20a_channel_update(ch);
208}
209
210void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
211{
212 nvgpu_log_fn(ch->g, " ");
213
214 if (gk20a_is_channel_marked_as_tsg(ch))
215 return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt);
216
217 /* make sure new kickoffs are prevented */
218 ch->has_timedout = true;
219
220 ch->g->ops.fifo.disable_channel(ch);
221
222 if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch))
223 ch->g->ops.fifo.preempt_channel(ch->g, ch->chid);
224
225 if (ch->g->ops.fifo.ch_abort_clean_up)
226 ch->g->ops.fifo.ch_abort_clean_up(ch);
227}
228
229int gk20a_wait_channel_idle(struct channel_gk20a *ch)
230{
231 bool channel_idle = false;
232 struct nvgpu_timeout timeout;
233
234 nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g),
235 NVGPU_TIMER_CPU_TIMER);
236
237 do {
238 channel_gk20a_joblist_lock(ch);
239 channel_idle = channel_gk20a_joblist_is_empty(ch);
240 channel_gk20a_joblist_unlock(ch);
241 if (channel_idle)
242 break;
243
244 nvgpu_usleep_range(1000, 3000);
245 } while (!nvgpu_timeout_expired(&timeout));
246
247 if (!channel_idle) {
248 nvgpu_err(ch->g, "jobs not freed for channel %d",
249 ch->chid);
250 return -EBUSY;
251 }
252
253 return 0;
254}
255
256void gk20a_disable_channel(struct channel_gk20a *ch)
257{
258 gk20a_channel_abort(ch, true);
259 channel_gk20a_update_runlist(ch, false);
260}
261
262void gk20a_wait_until_counter_is_N(
263 struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
264 struct nvgpu_cond *c, const char *caller, const char *counter_name)
265{
266 while (true) {
267 if (NVGPU_COND_WAIT(
268 c,
269 nvgpu_atomic_read(counter) == wait_value,
270 5000) == 0)
271 break;
272
273 nvgpu_warn(ch->g,
274 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
275 caller, ch->chid, counter_name,
276 nvgpu_atomic_read(counter), wait_value);
277
278 gk20a_channel_dump_ref_actions(ch);
279 }
280}
281
282/* call ONLY when no references to the channel exist: after the last put */
283static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
284{
285 struct gk20a *g = ch->g;
286 struct fifo_gk20a *f = &g->fifo;
287 struct gr_gk20a *gr = &g->gr;
288 struct vm_gk20a *ch_vm = ch->vm;
289 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
290 struct dbg_session_gk20a *dbg_s;
291 struct dbg_session_data *session_data, *tmp_s;
292 struct dbg_session_channel_data *ch_data, *tmp;
293 int err;
294
295 nvgpu_log_fn(g, " ");
296
297 WARN_ON(ch->g == NULL);
298
299 trace_gk20a_free_channel(ch->chid);
300
301 if (g->os_channel.close)
302 g->os_channel.close(ch);
303
304 /*
305 * Disable channel/TSG and unbind here. This should not be executed if
306 * HW access is not available during shutdown/removal path as it will
307 * trigger a timeout
308 */
309 if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
310 /* abort channel and remove from runlist */
311 if (gk20a_is_channel_marked_as_tsg(ch)) {
312 err = gk20a_tsg_unbind_channel(ch);
313 if (err)
314 nvgpu_err(g,
315 "failed to unbind channel %d from TSG",
316 ch->chid);
317 } else {
318 /*
319 * Channel is already unbound from TSG by User with
320 * explicit call
321 * Nothing to do here in that case
322 */
323 }
324 }
325 /* wait until there's only our ref to the channel */
326 if (!force)
327 gk20a_wait_until_counter_is_N(
328 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
329 __func__, "references");
330
331 /* wait until all pending interrupts for recently completed
332 * jobs are handled */
333 nvgpu_wait_for_deferred_interrupts(g);
334
335 /* prevent new refs */
336 nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
337 if (!ch->referenceable) {
338 nvgpu_spinlock_release(&ch->ref_obtain_lock);
339 nvgpu_err(ch->g,
340 "Extra %s() called to channel %u",
341 __func__, ch->chid);
342 return;
343 }
344 ch->referenceable = false;
345 nvgpu_spinlock_release(&ch->ref_obtain_lock);
346
347 /* matches with the initial reference in gk20a_open_new_channel() */
348 nvgpu_atomic_dec(&ch->ref_count);
349
350 /* wait until no more refs to the channel */
351 if (!force)
352 gk20a_wait_until_counter_is_N(
353 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
354 __func__, "references");
355
356 /* if engine reset was deferred, perform it now */
357 nvgpu_mutex_acquire(&f->deferred_reset_mutex);
358 if (g->fifo.deferred_reset_pending) {
359 nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
360 " deferred, running now");
361 /* if lock is already taken, a reset is taking place
362 so no need to repeat */
363 if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
364 gk20a_fifo_deferred_reset(g, ch);
365 nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
366 }
367 }
368 nvgpu_mutex_release(&f->deferred_reset_mutex);
369
370 if (!gk20a_channel_as_bound(ch))
371 goto unbind;
372
373 nvgpu_log_info(g, "freeing bound channel context, timeout=%ld",
374 timeout);
375
376#ifdef CONFIG_GK20A_CTXSW_TRACE
377 if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
378 g->ops.fecs_trace.unbind_channel(g, ch);
379#endif
380
381 if(g->ops.fifo.free_channel_ctx_header)
382 g->ops.fifo.free_channel_ctx_header(ch);
383
384 if (ch->usermode_submit_enabled) {
385 gk20a_channel_free_usermode_buffers(ch);
386 ch->userd_iova = nvgpu_mem_get_addr(g, &f->userd) +
387 ch->chid * f->userd_entry_size;
388 ch->usermode_submit_enabled = false;
389 }
390
391 gk20a_gr_flush_channel_tlb(gr);
392
393 nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem);
394 nvgpu_big_free(g, ch->gpfifo.pipe);
395 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
396
397 channel_gk20a_free_priv_cmdbuf(ch);
398
399 /* sync must be destroyed before releasing channel vm */
400 nvgpu_mutex_acquire(&ch->sync_lock);
401 if (ch->sync) {
402 gk20a_channel_sync_destroy(ch->sync, false);
403 ch->sync = NULL;
404 }
405 if (ch->user_sync) {
406 /*
407 * Set user managed syncpoint to safe state
408 * But it's already done if channel has timedout
409 */
410 if (ch->has_timedout)
411 gk20a_channel_sync_destroy(ch->user_sync, false);
412 else
413 gk20a_channel_sync_destroy(ch->user_sync, true);
414 ch->user_sync = NULL;
415 }
416 nvgpu_mutex_release(&ch->sync_lock);
417
418 /*
419 * free the channel used semaphore index.
420 * we need to do this before releasing the address space,
421 * as the semaphore pool might get freed after that point.
422 */
423 if (ch->hw_sema)
424 nvgpu_semaphore_free_hw_sema(ch);
425
426 /*
427 * When releasing the channel we unbind the VM - so release the ref.
428 */
429 nvgpu_vm_put(ch_vm);
430
431 /* make sure we don't have deferred interrupts pending that
432 * could still touch the channel */
433 nvgpu_wait_for_deferred_interrupts(g);
434
435unbind:
436 g->ops.fifo.unbind_channel(ch);
437 g->ops.fifo.free_inst(g, ch);
438
439 /* put back the channel-wide submit ref from init */
440 if (ch->deterministic) {
441 nvgpu_rwsem_down_read(&g->deterministic_busy);
442 ch->deterministic = false;
443 if (!ch->deterministic_railgate_allowed)
444 gk20a_idle(g);
445 ch->deterministic_railgate_allowed = false;
446
447 nvgpu_rwsem_up_read(&g->deterministic_busy);
448 }
449
450 ch->vpr = false;
451 ch->vm = NULL;
452
453 WARN_ON(ch->sync);
454
455 /* unlink all debug sessions */
456 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
457
458 nvgpu_list_for_each_entry_safe(session_data, tmp_s,
459 &ch->dbg_s_list, dbg_session_data, dbg_s_entry) {
460 dbg_s = session_data->dbg_s;
461 nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
462 nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
463 dbg_session_channel_data, ch_entry) {
464 if (ch_data->chid == ch->chid)
465 ch_data->unbind_single_channel(dbg_s, ch_data);
466 }
467 nvgpu_mutex_release(&dbg_s->ch_list_lock);
468 }
469
470 nvgpu_mutex_release(&g->dbg_sessions_lock);
471
472 /* free pre-allocated resources, if applicable */
473 if (channel_gk20a_is_prealloc_enabled(ch))
474 channel_gk20a_free_prealloc_resources(ch);
475
476#if GK20A_CHANNEL_REFCOUNT_TRACKING
477 memset(ch->ref_actions, 0, sizeof(ch->ref_actions));
478 ch->ref_actions_put = 0;
479#endif
480
481 /* make sure we catch accesses of unopened channels in case
482 * there's non-refcounted channel pointers hanging around */
483 ch->g = NULL;
484 nvgpu_smp_wmb();
485
486 /* ALWAYS last */
487 free_channel(f, ch);
488}
489
490static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
491{
492#if GK20A_CHANNEL_REFCOUNT_TRACKING
493 size_t i, get;
494 s64 now = nvgpu_current_time_ms();
495 s64 prev = 0;
496 struct gk20a *g = ch->g;
497
498 nvgpu_spinlock_acquire(&ch->ref_actions_lock);
499
500 nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:",
501 ch->chid, nvgpu_atomic_read(&ch->ref_count));
502
503 /* start at the oldest possible entry. put is next insertion point */
504 get = ch->ref_actions_put;
505
506 /*
507 * If the buffer is not full, this will first loop to the oldest entry,
508 * skipping not-yet-initialized entries. There is no ref_actions_get.
509 */
510 for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) {
511 struct channel_gk20a_ref_action *act = &ch->ref_actions[get];
512
513 if (act->trace.nr_entries) {
514 nvgpu_info(g,
515 "%s ref %zu steps ago (age %lld ms, diff %lld ms)",
516 act->type == channel_gk20a_ref_action_get
517 ? "GET" : "PUT",
518 GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i,
519 now - act->timestamp_ms,
520 act->timestamp_ms - prev);
521
522 print_stack_trace(&act->trace, 0);
523 prev = act->timestamp_ms;
524 }
525
526 get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
527 }
528
529 nvgpu_spinlock_release(&ch->ref_actions_lock);
530#endif
531}
532
533static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
534 enum channel_gk20a_ref_action_type type)
535{
536#if GK20A_CHANNEL_REFCOUNT_TRACKING
537 struct channel_gk20a_ref_action *act;
538
539 nvgpu_spinlock_acquire(&ch->ref_actions_lock);
540
541 act = &ch->ref_actions[ch->ref_actions_put];
542 act->type = type;
543 act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN;
544 act->trace.nr_entries = 0;
545 act->trace.skip = 3; /* onwards from the caller of this */
546 act->trace.entries = act->trace_entries;
547 save_stack_trace(&act->trace);
548 act->timestamp_ms = nvgpu_current_time_ms();
549 ch->ref_actions_put = (ch->ref_actions_put + 1) %
550 GK20A_CHANNEL_REFCOUNT_TRACKING;
551
552 nvgpu_spinlock_release(&ch->ref_actions_lock);
553#endif
554}
555
556/* Try to get a reference to the channel. Return nonzero on success. If fails,
557 * the channel is dead or being freed elsewhere and you must not touch it.
558 *
559 * Always when a channel_gk20a pointer is seen and about to be used, a
560 * reference must be held to it - either by you or the caller, which should be
561 * documented well or otherwise clearly seen. This usually boils down to the
562 * file from ioctls directly, or an explicit get in exception handlers when the
563 * channel is found by a chid.
564 *
565 * Most global functions in this file require a reference to be held by the
566 * caller.
567 */
568struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
569 const char *caller) {
570 struct channel_gk20a *ret;
571
572 nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
573
574 if (likely(ch->referenceable)) {
575 gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
576 nvgpu_atomic_inc(&ch->ref_count);
577 ret = ch;
578 } else
579 ret = NULL;
580
581 nvgpu_spinlock_release(&ch->ref_obtain_lock);
582
583 if (ret)
584 trace_gk20a_channel_get(ch->chid, caller);
585
586 return ret;
587}
588
589void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
590{
591 gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
592 trace_gk20a_channel_put(ch->chid, caller);
593 nvgpu_atomic_dec(&ch->ref_count);
594 nvgpu_cond_broadcast(&ch->ref_count_dec_wq);
595
596 /* More puts than gets. Channel is probably going to get
597 * stuck. */
598 WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);
599
600 /* Also, more puts than gets. ref_count can go to 0 only if
601 * the channel is closing. Channel is probably going to get
602 * stuck. */
603 WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
604}
605
606void gk20a_channel_close(struct channel_gk20a *ch)
607{
608 gk20a_free_channel(ch, false);
609}
610
611/*
612 * Be careful with this - it is meant for terminating channels when we know the
613 * driver is otherwise dying. Ref counts and the like are ignored by this
614 * version of the cleanup.
615 */
616void __gk20a_channel_kill(struct channel_gk20a *ch)
617{
618 gk20a_free_channel(ch, true);
619}
620
621struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
622 s32 runlist_id,
623 bool is_privileged_channel,
624 pid_t pid, pid_t tid)
625{
626 struct fifo_gk20a *f = &g->fifo;
627 struct channel_gk20a *ch;
628
629 /* compatibility with existing code */
630 if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) {
631 runlist_id = gk20a_fifo_get_gr_runlist_id(g);
632 }
633
634 nvgpu_log_fn(g, " ");
635
636 ch = allocate_channel(f);
637 if (ch == NULL) {
638 /* TBD: we want to make this virtualizable */
639 nvgpu_err(g, "out of hw chids");
640 return NULL;
641 }
642
643 trace_gk20a_open_new_channel(ch->chid);
644
645 BUG_ON(ch->g);
646 ch->g = g;
647
648 /* Runlist for the channel */
649 ch->runlist_id = runlist_id;
650
651 /* Channel privilege level */
652 ch->is_privileged_channel = is_privileged_channel;
653
654 ch->pid = tid;
655 ch->tgid = pid; /* process granularity for FECS traces */
656
657 if (g->ops.fifo.alloc_inst(g, ch)) {
658 ch->g = NULL;
659 free_channel(f, ch);
660 nvgpu_err(g,
661 "failed to open gk20a channel, out of inst mem");
662 return NULL;
663 }
664
665 /* now the channel is in a limbo out of the free list but not marked as
666 * alive and used (i.e. get-able) yet */
667
668 /* By default, channel is regular (non-TSG) channel */
669 ch->tsgid = NVGPU_INVALID_TSG_ID;
670
671 /* clear ctxsw timeout counter and update timestamp */
672 ch->timeout_accumulated_ms = 0;
673 ch->timeout_gpfifo_get = 0;
674 /* set gr host default timeout */
675 ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
676 ch->timeout_debug_dump = true;
677 ch->has_timedout = false;
678
679 /* init kernel watchdog timeout */
680 ch->timeout.enabled = true;
681 ch->timeout.limit_ms = g->ch_wdt_timeout_ms;
682 ch->timeout.debug_dump = true;
683
684 ch->obj_class = 0;
685 ch->subctx_id = 0;
686 ch->runqueue_sel = 0;
687
688 ch->mmu_nack_handled = false;
689
690 /* The channel is *not* runnable at this point. It still needs to have
691 * an address space bound and allocate a gpfifo and grctx. */
692
693 nvgpu_cond_init(&ch->notifier_wq);
694 nvgpu_cond_init(&ch->semaphore_wq);
695
696 if (g->os_channel.open)
697 g->os_channel.open(ch);
698
699 /* Mark the channel alive, get-able, with 1 initial use
700 * references. The initial reference will be decreased in
701 * gk20a_free_channel() */
702 ch->referenceable = true;
703 nvgpu_atomic_set(&ch->ref_count, 1);
704 nvgpu_smp_wmb();
705
706 return ch;
707}
708
709/* allocate private cmd buffer.
710 used for inserting commands before/after user submitted buffers. */
711static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
712{
713 struct gk20a *g = c->g;
714 struct vm_gk20a *ch_vm = c->vm;
715 struct priv_cmd_queue *q = &c->priv_cmd_q;
716 u32 size;
717 int err = 0;
718
719 /*
720 * Compute the amount of priv_cmdbuf space we need. In general the worst
721 * case is the kernel inserts both a semaphore pre-fence and post-fence.
722 * Any sync-pt fences will take less memory so we can ignore them for
723 * now.
724 *
725 * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b,
726 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
727 * dwords: all the same as an ACQ plus a non-stalling intr which is
728 * another 2 dwords.
729 *
730 * Lastly the number of gpfifo entries per channel is fixed so at most
731 * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one
732 * userspace entry, and one post-fence entry). Thus the computation is:
733 *
734 * (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes.
735 */
736 size = roundup_pow_of_two(c->gpfifo.entry_num *
737 2 * 18 * sizeof(u32) / 3);
738
739 err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem);
740 if (err) {
741 nvgpu_err(g, "%s: memory allocation failed", __func__);
742 goto clean_up;
743 }
744
745 q->size = q->mem.size / sizeof (u32);
746
747 return 0;
748
749clean_up:
750 channel_gk20a_free_priv_cmdbuf(c);
751 return err;
752}
753
754static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
755{
756 struct vm_gk20a *ch_vm = c->vm;
757 struct priv_cmd_queue *q = &c->priv_cmd_q;
758
759 if (q->size == 0)
760 return;
761
762 nvgpu_dma_unmap_free(ch_vm, &q->mem);
763
764 memset(q, 0, sizeof(struct priv_cmd_queue));
765}
766
767/* allocate a cmd buffer with given size. size is number of u32 entries */
768int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
769 struct priv_cmd_entry *e)
770{
771 struct priv_cmd_queue *q = &c->priv_cmd_q;
772 u32 free_count;
773 u32 size = orig_size;
774
775 nvgpu_log_fn(c->g, "size %d", orig_size);
776
777 if (!e) {
778 nvgpu_err(c->g,
779 "ch %d: priv cmd entry is null",
780 c->chid);
781 return -EINVAL;
782 }
783
784 /* if free space in the end is less than requested, increase the size
785 * to make the real allocated space start from beginning. */
786 if (q->put + size > q->size)
787 size = orig_size + (q->size - q->put);
788
789 nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d",
790 c->chid, q->get, q->put);
791
792 free_count = (q->size - (q->put - q->get) - 1) % q->size;
793
794 if (size > free_count)
795 return -EAGAIN;
796
797 e->size = orig_size;
798 e->mem = &q->mem;
799
800 /* if we have increased size to skip free space in the end, set put
801 to beginning of cmd buffer (0) + size */
802 if (size != orig_size) {
803 e->off = 0;
804 e->gva = q->mem.gpu_va;
805 q->put = orig_size;
806 } else {
807 e->off = q->put;
808 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
809 q->put = (q->put + orig_size) & (q->size - 1);
810 }
811
812 /* we already handled q->put + size > q->size so BUG_ON this */
813 BUG_ON(q->put > q->size);
814
815 /*
816 * commit the previous writes before making the entry valid.
817 * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf().
818 */
819 nvgpu_smp_wmb();
820
821 e->valid = true;
822 nvgpu_log_fn(c->g, "done");
823
824 return 0;
825}
826
827/* Don't call this to free an explict cmd entry.
828 * It doesn't update priv_cmd_queue get/put */
829void free_priv_cmdbuf(struct channel_gk20a *c,
830 struct priv_cmd_entry *e)
831{
832 if (channel_gk20a_is_prealloc_enabled(c))
833 memset(e, 0, sizeof(struct priv_cmd_entry));
834 else
835 nvgpu_kfree(c->g, e);
836}
837
838int channel_gk20a_alloc_job(struct channel_gk20a *c,
839 struct channel_gk20a_job **job_out)
840{
841 int err = 0;
842
843 if (channel_gk20a_is_prealloc_enabled(c)) {
844 int put = c->joblist.pre_alloc.put;
845 int get = c->joblist.pre_alloc.get;
846
847 /*
848 * ensure all subsequent reads happen after reading get.
849 * see corresponding nvgpu_smp_wmb in
850 * gk20a_channel_clean_up_jobs()
851 */
852 nvgpu_smp_rmb();
853
854 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
855 *job_out = &c->joblist.pre_alloc.jobs[put];
856 else {
857 nvgpu_warn(c->g,
858 "out of job ringbuffer space");
859 err = -EAGAIN;
860 }
861 } else {
862 *job_out = nvgpu_kzalloc(c->g,
863 sizeof(struct channel_gk20a_job));
864 if (!*job_out)
865 err = -ENOMEM;
866 }
867
868 return err;
869}
870
871void channel_gk20a_free_job(struct channel_gk20a *c,
872 struct channel_gk20a_job *job)
873{
874 /*
875 * In case of pre_allocated jobs, we need to clean out
876 * the job but maintain the pointers to the priv_cmd_entry,
877 * since they're inherently tied to the job node.
878 */
879 if (channel_gk20a_is_prealloc_enabled(c)) {
880 struct priv_cmd_entry *wait_cmd = job->wait_cmd;
881 struct priv_cmd_entry *incr_cmd = job->incr_cmd;
882 memset(job, 0, sizeof(*job));
883 job->wait_cmd = wait_cmd;
884 job->incr_cmd = incr_cmd;
885 } else
886 nvgpu_kfree(c->g, job);
887}
888
889void channel_gk20a_joblist_lock(struct channel_gk20a *c)
890{
891 if (channel_gk20a_is_prealloc_enabled(c))
892 nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
893 else
894 nvgpu_spinlock_acquire(&c->joblist.dynamic.lock);
895}
896
897void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
898{
899 if (channel_gk20a_is_prealloc_enabled(c))
900 nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
901 else
902 nvgpu_spinlock_release(&c->joblist.dynamic.lock);
903}
904
905static struct channel_gk20a_job *channel_gk20a_joblist_peek(
906 struct channel_gk20a *c)
907{
908 int get;
909 struct channel_gk20a_job *job = NULL;
910
911 if (channel_gk20a_is_prealloc_enabled(c)) {
912 if (!channel_gk20a_joblist_is_empty(c)) {
913 get = c->joblist.pre_alloc.get;
914 job = &c->joblist.pre_alloc.jobs[get];
915 }
916 } else {
917 if (!nvgpu_list_empty(&c->joblist.dynamic.jobs))
918 job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs,
919 channel_gk20a_job, list);
920 }
921
922 return job;
923}
924
925static void channel_gk20a_joblist_add(struct channel_gk20a *c,
926 struct channel_gk20a_job *job)
927{
928 if (channel_gk20a_is_prealloc_enabled(c)) {
929 c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) %
930 (c->joblist.pre_alloc.length);
931 } else {
932 nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs);
933 }
934}
935
936static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
937 struct channel_gk20a_job *job)
938{
939 if (channel_gk20a_is_prealloc_enabled(c)) {
940 c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) %
941 (c->joblist.pre_alloc.length);
942 } else {
943 nvgpu_list_del(&job->list);
944 }
945}
946
947bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c)
948{
949 if (channel_gk20a_is_prealloc_enabled(c)) {
950 int get = c->joblist.pre_alloc.get;
951 int put = c->joblist.pre_alloc.put;
952 return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length));
953 }
954
955 return nvgpu_list_empty(&c->joblist.dynamic.jobs);
956}
957
958bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
959{
960 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
961
962 nvgpu_smp_rmb();
963 return pre_alloc_enabled;
964}
965
966static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
967 unsigned int num_jobs)
968{
969 unsigned int i;
970 int err;
971 size_t size;
972 struct priv_cmd_entry *entries = NULL;
973
974 if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs)
975 return -EINVAL;
976
977 /*
978 * pre-allocate the job list.
979 * since vmalloc take in an unsigned long, we need
980 * to make sure we don't hit an overflow condition
981 */
982 size = sizeof(struct channel_gk20a_job);
983 if (num_jobs <= ULONG_MAX / size)
984 c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
985 num_jobs * size);
986 if (!c->joblist.pre_alloc.jobs) {
987 err = -ENOMEM;
988 goto clean_up;
989 }
990
991 /*
992 * pre-allocate 2x priv_cmd_entry for each job up front.
993 * since vmalloc take in an unsigned long, we need
994 * to make sure we don't hit an overflow condition
995 */
996 size = sizeof(struct priv_cmd_entry);
997 if (num_jobs <= ULONG_MAX / (size << 1))
998 entries = nvgpu_vzalloc(c->g, (num_jobs << 1) * size);
999 if (!entries) {
1000 err = -ENOMEM;
1001 goto clean_up_joblist;
1002 }
1003
1004 for (i = 0; i < num_jobs; i++) {
1005 c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
1006 c->joblist.pre_alloc.jobs[i].incr_cmd =
1007 &entries[i + num_jobs];
1008 }
1009
1010 /* pre-allocate a fence pool */
1011 err = gk20a_alloc_fence_pool(c, num_jobs);
1012 if (err)
1013 goto clean_up_priv_cmd;
1014
1015 c->joblist.pre_alloc.length = num_jobs;
1016 c->joblist.pre_alloc.put = 0;
1017 c->joblist.pre_alloc.get = 0;
1018
1019 /*
1020 * commit the previous writes before setting the flag.
1021 * see corresponding nvgpu_smp_rmb in
1022 * channel_gk20a_is_prealloc_enabled()
1023 */
1024 nvgpu_smp_wmb();
1025 c->joblist.pre_alloc.enabled = true;
1026
1027 return 0;
1028
1029clean_up_priv_cmd:
1030 nvgpu_vfree(c->g, entries);
1031clean_up_joblist:
1032 nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
1033clean_up:
1034 memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
1035 return err;
1036}
1037
1038static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
1039{
1040 nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd);
1041 nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
1042 gk20a_free_fence_pool(c);
1043
1044 /*
1045 * commit the previous writes before disabling the flag.
1046 * see corresponding nvgpu_smp_rmb in
1047 * channel_gk20a_is_prealloc_enabled()
1048 */
1049 nvgpu_smp_wmb();
1050 c->joblist.pre_alloc.enabled = false;
1051}
1052
1053int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
1054 struct nvgpu_gpfifo_args *gpfifo_args)
1055{
1056 struct gk20a *g = c->g;
1057 struct vm_gk20a *ch_vm;
1058 u32 gpfifo_size, gpfifo_entry_size;
1059 int err = 0;
1060 unsigned long acquire_timeout;
1061
1062 gpfifo_size = gpfifo_args->num_entries;
1063 gpfifo_entry_size = nvgpu_get_gpfifo_entry_size();
1064
1065 if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_VPR)
1066 c->vpr = true;
1067
1068 if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC) {
1069 nvgpu_rwsem_down_read(&g->deterministic_busy);
1070 /*
1071 * Railgating isn't deterministic; instead of disallowing
1072 * railgating globally, take a power refcount for this
1073 * channel's lifetime. The gk20a_idle() pair for this happens
1074 * when the channel gets freed.
1075 *
1076 * Deterministic flag and this busy must be atomic within the
1077 * busy lock.
1078 */
1079 err = gk20a_busy(g);
1080 if (err) {
1081 nvgpu_rwsem_up_read(&g->deterministic_busy);
1082 return err;
1083 }
1084
1085 c->deterministic = true;
1086 nvgpu_rwsem_up_read(&g->deterministic_busy);
1087 }
1088
1089 /* an address space needs to have been bound at this point. */
1090 if (!gk20a_channel_as_bound(c)) {
1091 nvgpu_err(g,
1092 "not bound to an address space at time of gpfifo"
1093 " allocation.");
1094 err = -EINVAL;
1095 goto clean_up_idle;
1096 }
1097 ch_vm = c->vm;
1098
1099 if (c->gpfifo.mem.size) {
1100 nvgpu_err(g, "channel %d :"
1101 "gpfifo already allocated", c->chid);
1102 err = -EEXIST;
1103 goto clean_up_idle;
1104 }
1105
1106 if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT) {
1107 if (g->ops.fifo.alloc_usermode_buffers) {
1108 err = g->ops.fifo.alloc_usermode_buffers(c,
1109 gpfifo_args);
1110 if (err) {
1111 nvgpu_err(g, "Usermode buffer alloc failed");
1112 goto clean_up;
1113 }
1114 c->userd_iova = nvgpu_mem_get_addr(g,
1115 &c->usermode_userd);
1116 c->usermode_submit_enabled = true;
1117 } else {
1118 nvgpu_err(g, "Usermode submit not supported");
1119 err = -EINVAL;
1120 goto clean_up;
1121 }
1122 }
1123
1124 err = nvgpu_dma_alloc_map_sys(ch_vm,
1125 gpfifo_size * gpfifo_entry_size,
1126 &c->gpfifo.mem);
1127 if (err) {
1128 nvgpu_err(g, "%s: memory allocation failed", __func__);
1129 goto clean_up_usermode;
1130 }
1131
1132 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM) {
1133 c->gpfifo.pipe = nvgpu_big_malloc(g,
1134 gpfifo_size * gpfifo_entry_size);
1135 if (!c->gpfifo.pipe) {
1136 err = -ENOMEM;
1137 goto clean_up_unmap;
1138 }
1139 }
1140
1141 c->gpfifo.entry_num = gpfifo_size;
1142 c->gpfifo.get = c->gpfifo.put = 0;
1143
1144 nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
1145 c->chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1146
1147 g->ops.fifo.setup_userd(c);
1148
1149 if (!g->aggressive_sync_destroy_thresh) {
1150 nvgpu_mutex_acquire(&c->sync_lock);
1151 c->sync = gk20a_channel_sync_create(c, false);
1152 if (!c->sync) {
1153 err = -ENOMEM;
1154 nvgpu_mutex_release(&c->sync_lock);
1155 goto clean_up_unmap;
1156 }
1157 nvgpu_mutex_release(&c->sync_lock);
1158
1159 if (g->ops.fifo.resetup_ramfc) {
1160 err = g->ops.fifo.resetup_ramfc(c);
1161 if (err)
1162 goto clean_up_sync;
1163 }
1164 }
1165
1166 if (!nvgpu_is_timeouts_enabled(c->g) || !c->timeout.enabled)
1167 acquire_timeout = 0;
1168 else
1169 acquire_timeout = c->timeout.limit_ms;
1170
1171 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1172 c->gpfifo.entry_num,
1173 acquire_timeout, gpfifo_args->flags);
1174 if (err)
1175 goto clean_up_sync;
1176
1177 /* TBD: setup engine contexts */
1178
1179 if (gpfifo_args->num_inflight_jobs) {
1180 err = channel_gk20a_prealloc_resources(c,
1181 gpfifo_args->num_inflight_jobs);
1182 if (err)
1183 goto clean_up_sync;
1184 }
1185
1186 err = channel_gk20a_alloc_priv_cmdbuf(c);
1187 if (err)
1188 goto clean_up_prealloc;
1189
1190 err = channel_gk20a_update_runlist(c, true);
1191 if (err)
1192 goto clean_up_priv_cmd;
1193
1194 g->ops.fifo.bind_channel(c);
1195
1196 nvgpu_log_fn(g, "done");
1197 return 0;
1198
1199clean_up_priv_cmd:
1200 channel_gk20a_free_priv_cmdbuf(c);
1201clean_up_prealloc:
1202 if (gpfifo_args->num_inflight_jobs)
1203 channel_gk20a_free_prealloc_resources(c);
1204clean_up_sync:
1205 if (c->sync) {
1206 gk20a_channel_sync_destroy(c->sync, false);
1207 c->sync = NULL;
1208 }
1209clean_up_unmap:
1210 nvgpu_big_free(g, c->gpfifo.pipe);
1211 nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem);
1212clean_up_usermode:
1213 if (c->usermode_submit_enabled) {
1214 gk20a_channel_free_usermode_buffers(c);
1215 c->userd_iova = nvgpu_mem_get_addr(g, &g->fifo.userd) +
1216 c->chid * g->fifo.userd_entry_size;
1217 c->usermode_submit_enabled = false;
1218 }
1219clean_up:
1220 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1221clean_up_idle:
1222 if (c->deterministic) {
1223 nvgpu_rwsem_down_read(&g->deterministic_busy);
1224 gk20a_idle(g);
1225 c->deterministic = false;
1226 nvgpu_rwsem_up_read(&g->deterministic_busy);
1227 }
1228 nvgpu_err(g, "fail");
1229 return err;
1230}
1231
1232void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c)
1233{
1234 if (nvgpu_mem_is_valid(&c->usermode_userd))
1235 nvgpu_dma_free(c->g, &c->usermode_userd);
1236}
1237
1238/* Update with this periodically to determine how the gpfifo is draining. */
1239static inline u32 update_gp_get(struct gk20a *g,
1240 struct channel_gk20a *c)
1241{
1242 u32 new_get = g->ops.fifo.userd_gp_get(g, c);
1243
1244 if (new_get < c->gpfifo.get)
1245 c->gpfifo.wrap = !c->gpfifo.wrap;
1246 c->gpfifo.get = new_get;
1247 return new_get;
1248}
1249
1250u32 nvgpu_gp_free_count(struct channel_gk20a *c)
1251{
1252 return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1253 c->gpfifo.entry_num;
1254}
1255
1256bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1257 u32 timeout_delta_ms, bool *progress)
1258{
1259 u32 gpfifo_get = update_gp_get(ch->g, ch);
1260
1261 /* Count consequent timeout isr */
1262 if (gpfifo_get == ch->timeout_gpfifo_get) {
1263 /* we didn't advance since previous channel timeout check */
1264 ch->timeout_accumulated_ms += timeout_delta_ms;
1265 *progress = false;
1266 } else {
1267 /* first timeout isr encountered */
1268 ch->timeout_accumulated_ms = timeout_delta_ms;
1269 *progress = true;
1270 }
1271
1272 ch->timeout_gpfifo_get = gpfifo_get;
1273
1274 return nvgpu_is_timeouts_enabled(ch->g) &&
1275 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1276}
1277
1278u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
1279{
1280 update_gp_get(c->g, c);
1281 return nvgpu_gp_free_count(c);
1282}
1283
1284static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
1285{
1286 ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch);
1287 ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch);
1288 ch->timeout.running = true;
1289 nvgpu_timeout_init(ch->g, &ch->timeout.timer,
1290 ch->timeout.limit_ms,
1291 NVGPU_TIMER_CPU_TIMER);
1292}
1293
1294/**
1295 * Start a timeout counter (watchdog) on this channel.
1296 *
1297 * Trigger a watchdog to recover the channel after the per-platform timeout
1298 * duration (but strictly no earlier) if the channel hasn't advanced within
1299 * that time.
1300 *
1301 * If the timeout is already running, do nothing. This should be called when
1302 * new jobs are submitted. The timeout will stop when the last tracked job
1303 * finishes, making the channel idle.
1304 *
1305 * The channel's gpfifo read pointer will be used to determine if the job has
1306 * actually stuck at that time. After the timeout duration has expired, a
1307 * worker thread will consider the channel stuck and recover it if stuck.
1308 */
1309static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
1310{
1311 if (!nvgpu_is_timeouts_enabled(ch->g))
1312 return;
1313
1314 if (!ch->timeout.enabled)
1315 return;
1316
1317 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1318
1319 if (ch->timeout.running) {
1320 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1321 return;
1322 }
1323 __gk20a_channel_timeout_start(ch);
1324 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1325}
1326
1327/**
1328 * Stop a running timeout counter (watchdog) on this channel.
1329 *
1330 * Make the watchdog consider the channel not running, so that it won't get
1331 * recovered even if no progress is detected. Progress is not tracked if the
1332 * watchdog is turned off.
1333 *
1334 * No guarantees are made about concurrent execution of the timeout handler.
1335 * (This should be called from an update handler running in the same thread
1336 * with the watchdog.)
1337 */
1338static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
1339{
1340 bool was_running;
1341
1342 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1343 was_running = ch->timeout.running;
1344 ch->timeout.running = false;
1345 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1346 return was_running;
1347}
1348
1349/**
1350 * Continue a previously stopped timeout
1351 *
1352 * Enable the timeout again but don't reinitialize its timer.
1353 *
1354 * No guarantees are made about concurrent execution of the timeout handler.
1355 * (This should be called from an update handler running in the same thread
1356 * with the watchdog.)
1357 */
1358static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
1359{
1360 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1361 ch->timeout.running = true;
1362 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1363}
1364
1365/**
1366 * Rewind the timeout on each non-dormant channel.
1367 *
1368 * Reschedule the timeout of each active channel for which timeouts are running
1369 * as if something was happened on each channel right now. This should be
1370 * called when a global hang is detected that could cause a false positive on
1371 * other innocent channels.
1372 */
1373void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
1374{
1375 struct fifo_gk20a *f = &g->fifo;
1376 u32 chid;
1377
1378 for (chid = 0; chid < f->num_channels; chid++) {
1379 struct channel_gk20a *ch = &f->channel[chid];
1380
1381 if (!gk20a_channel_get(ch))
1382 continue;
1383
1384 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1385 if (ch->timeout.running)
1386 __gk20a_channel_timeout_start(ch);
1387 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1388
1389 gk20a_channel_put(ch);
1390 }
1391}
1392
1393/**
1394 * Check if a timed out channel has hung and recover it if it has.
1395 *
1396 * Test if this channel has really got stuck at this point by checking if its
1397 * {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since
1398 * when the watchdog was started and it's timed out, force-reset the channel.
1399 *
1400 * The gpu is implicitly on at this point, because the watchdog can only run on
1401 * channels that have submitted jobs pending for cleanup.
1402 */
1403static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
1404{
1405 struct gk20a *g = ch->g;
1406 u32 gp_get;
1407 u32 new_gp_get;
1408 u64 pb_get;
1409 u64 new_pb_get;
1410
1411 nvgpu_log_fn(g, " ");
1412
1413 /* Get status but keep timer running */
1414 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1415 gp_get = ch->timeout.gp_get;
1416 pb_get = ch->timeout.pb_get;
1417 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1418
1419 new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch);
1420 new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch);
1421
1422 if (new_gp_get != gp_get || new_pb_get != pb_get) {
1423 /* Channel has advanced, rewind timer */
1424 gk20a_channel_timeout_stop(ch);
1425 gk20a_channel_timeout_start(ch);
1426 return;
1427 }
1428
1429 if (!nvgpu_timeout_peek_expired(&ch->timeout.timer)) {
1430 /* Seems stuck but waiting to time out */
1431 return;
1432 }
1433
1434 nvgpu_err(g, "Job on channel %d timed out",
1435 ch->chid);
1436
1437 /* force reset calls gk20a_debug_dump but not this */
1438 if (ch->timeout.debug_dump)
1439 gk20a_gr_debug_dump(g);
1440
1441 g->ops.fifo.force_reset_ch(ch,
1442 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
1443 ch->timeout.debug_dump);
1444}
1445
1446/**
1447 * Test if the per-channel watchdog is on; check the timeout in that case.
1448 *
1449 * Each channel has an expiration time based watchdog. The timer is
1450 * (re)initialized in two situations: when a new job is submitted on an idle
1451 * channel and when the timeout is checked but progress is detected. The
1452 * watchdog timeout limit is a coarse sliding window.
1453 *
1454 * The timeout is stopped (disabled) after the last job in a row finishes
1455 * and marks the channel idle.
1456 */
1457static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
1458{
1459 bool running;
1460
1461 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
1462 running = ch->timeout.running;
1463 nvgpu_raw_spinlock_release(&ch->timeout.lock);
1464
1465 if (running)
1466 gk20a_channel_timeout_handler(ch);
1467}
1468
1469/**
1470 * Loop every living channel, check timeouts and handle stuck channels.
1471 */
1472static void gk20a_channel_poll_timeouts(struct gk20a *g)
1473{
1474 unsigned int chid;
1475
1476
1477 for (chid = 0; chid < g->fifo.num_channels; chid++) {
1478 struct channel_gk20a *ch = &g->fifo.channel[chid];
1479
1480 if (gk20a_channel_get(ch)) {
1481 gk20a_channel_timeout_check(ch);
1482 gk20a_channel_put(ch);
1483 }
1484 }
1485}
1486
1487/*
1488 * Process one scheduled work item for this channel. Currently, the only thing
1489 * the worker does is job cleanup handling.
1490 */
1491static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch)
1492{
1493 nvgpu_log_fn(ch->g, " ");
1494
1495 gk20a_channel_clean_up_jobs(ch, true);
1496
1497 /* ref taken when enqueued */
1498 gk20a_channel_put(ch);
1499}
1500
1501/**
1502 * Tell the worker that one more work needs to be done.
1503 *
1504 * Increase the work counter to synchronize the worker with the new work. Wake
1505 * up the worker. If the worker was already running, it will handle this work
1506 * before going to sleep.
1507 */
1508static int __gk20a_channel_worker_wakeup(struct gk20a *g)
1509{
1510 int put;
1511
1512 nvgpu_log_fn(g, " ");
1513
1514 /*
1515 * Currently, the only work type is associated with a lock, which deals
1516 * with any necessary barriers. If a work type with no locking were
1517 * added, a nvgpu_smp_wmb() would be needed here. See
1518 * ..worker_pending() for a pair.
1519 */
1520
1521 put = nvgpu_atomic_inc_return(&g->channel_worker.put);
1522 nvgpu_cond_signal_interruptible(&g->channel_worker.wq);
1523
1524 return put;
1525}
1526
1527/**
1528 * Test if there is some work pending.
1529 *
1530 * This is a pair for __gk20a_channel_worker_wakeup to be called from the
1531 * worker. The worker has an internal work counter which is incremented once
1532 * per finished work item. This is compared with the number of queued jobs,
1533 * which may be channels on the items list or any other types of work.
1534 */
1535static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
1536{
1537 bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
1538
1539 /*
1540 * This would be the place for a nvgpu_smp_rmb() pairing
1541 * a nvgpu_smp_wmb() for a wakeup if we had any work with
1542 * no implicit barriers caused by locking.
1543 */
1544
1545 return pending;
1546}
1547
1548/**
1549 * Process the queued works for the worker thread serially.
1550 *
1551 * Flush all the work items in the queue one by one. This may block timeout
1552 * handling for a short while, as these are serialized.
1553 */
1554static void gk20a_channel_worker_process(struct gk20a *g, int *get)
1555{
1556
1557 while (__gk20a_channel_worker_pending(g, *get)) {
1558 struct channel_gk20a *ch = NULL;
1559
1560 /*
1561 * If a channel is on the list, it's guaranteed to be handled
1562 * eventually just once. However, the opposite is not true. A
1563 * channel may be being processed if it's on the list or not.
1564 *
1565 * With this, processing channel works should be conservative
1566 * as follows: it's always safe to look at a channel found in
1567 * the list, and if someone enqueues the channel, it will be
1568 * handled eventually, even if it's being handled at the same
1569 * time. A channel is on the list only once; multiple calls to
1570 * enqueue are harmless.
1571 */
1572 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
1573 if (!nvgpu_list_empty(&g->channel_worker.items)) {
1574 ch = nvgpu_list_first_entry(&g->channel_worker.items,
1575 channel_gk20a,
1576 worker_item);
1577 nvgpu_list_del(&ch->worker_item);
1578 }
1579 nvgpu_spinlock_release(&g->channel_worker.items_lock);
1580
1581 if (!ch) {
1582 /*
1583 * Woke up for some other reason, but there are no
1584 * other reasons than a channel added in the items list
1585 * currently, so warn and ack the message.
1586 */
1587 nvgpu_warn(g, "Spurious worker event!");
1588 ++*get;
1589 break;
1590 }
1591
1592 gk20a_channel_worker_process_ch(ch);
1593 ++*get;
1594 }
1595}
1596
1597/*
1598 * Look at channel states periodically, until canceled. Abort timed out
1599 * channels serially. Process all work items found in the queue.
1600 */
1601static int gk20a_channel_poll_worker(void *arg)
1602{
1603 struct gk20a *g = (struct gk20a *)arg;
1604 struct gk20a_worker *worker = &g->channel_worker;
1605 unsigned long watchdog_interval = 100; /* milliseconds */
1606 struct nvgpu_timeout timeout;
1607 int get = 0;
1608
1609 nvgpu_log_fn(g, " ");
1610
1611 nvgpu_timeout_init(g, &timeout, watchdog_interval,
1612 NVGPU_TIMER_CPU_TIMER);
1613 while (!nvgpu_thread_should_stop(&worker->poll_task)) {
1614 int ret;
1615
1616 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
1617 &worker->wq,
1618 __gk20a_channel_worker_pending(g, get),
1619 watchdog_interval);
1620
1621 if (ret == 0)
1622 gk20a_channel_worker_process(g, &get);
1623
1624 if (nvgpu_timeout_peek_expired(&timeout)) {
1625 gk20a_channel_poll_timeouts(g);
1626 nvgpu_timeout_init(g, &timeout, watchdog_interval,
1627 NVGPU_TIMER_CPU_TIMER);
1628 }
1629 }
1630 return 0;
1631}
1632
1633static int __nvgpu_channel_worker_start(struct gk20a *g)
1634{
1635 char thread_name[64];
1636 int err = 0;
1637
1638 if (nvgpu_thread_is_running(&g->channel_worker.poll_task))
1639 return err;
1640
1641 nvgpu_mutex_acquire(&g->channel_worker.start_lock);
1642
1643 /*
1644 * We don't want to grab a mutex on every channel update so we check
1645 * again if the worker has been initialized before creating a new thread
1646 */
1647
1648 /*
1649 * Mutexes have implicit barriers, so there is no risk of a thread
1650 * having a stale copy of the poll_task variable as the call to
1651 * thread_is_running is volatile
1652 */
1653
1654 if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) {
1655 nvgpu_mutex_release(&g->channel_worker.start_lock);
1656 return err;
1657 }
1658
1659 snprintf(thread_name, sizeof(thread_name),
1660 "nvgpu_channel_poll_%s", g->name);
1661
1662 err = nvgpu_thread_create(&g->channel_worker.poll_task, g,
1663 gk20a_channel_poll_worker, thread_name);
1664
1665 nvgpu_mutex_release(&g->channel_worker.start_lock);
1666 return err;
1667}
1668/**
1669 * Initialize the channel worker's metadata and start the background thread.
1670 */
1671int nvgpu_channel_worker_init(struct gk20a *g)
1672{
1673 int err;
1674
1675 nvgpu_atomic_set(&g->channel_worker.put, 0);
1676 nvgpu_cond_init(&g->channel_worker.wq);
1677 nvgpu_init_list_node(&g->channel_worker.items);
1678 nvgpu_spinlock_init(&g->channel_worker.items_lock);
1679 err = nvgpu_mutex_init(&g->channel_worker.start_lock);
1680 if (err)
1681 goto error_check;
1682
1683 err = __nvgpu_channel_worker_start(g);
1684error_check:
1685 if (err) {
1686 nvgpu_err(g, "failed to start channel poller thread");
1687 return err;
1688 }
1689 return 0;
1690}
1691
1692void nvgpu_channel_worker_deinit(struct gk20a *g)
1693{
1694 nvgpu_mutex_acquire(&g->channel_worker.start_lock);
1695 nvgpu_thread_stop(&g->channel_worker.poll_task);
1696 nvgpu_mutex_release(&g->channel_worker.start_lock);
1697}
1698
1699/**
1700 * Append a channel to the worker's list, if not there already.
1701 *
1702 * The worker thread processes work items (channels in its work list) and polls
1703 * for other things. This adds @ch to the end of the list and wakes the worker
1704 * up immediately. If the channel already existed in the list, it's not added,
1705 * because in that case it has been scheduled already but has not yet been
1706 * processed.
1707 */
1708static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
1709{
1710 struct gk20a *g = ch->g;
1711
1712 nvgpu_log_fn(g, " ");
1713
1714 /*
1715 * Warn if worker thread cannot run
1716 */
1717 if (WARN_ON(__nvgpu_channel_worker_start(g))) {
1718 nvgpu_warn(g, "channel worker cannot run!");
1719 return;
1720 }
1721
1722 /*
1723 * Ref released when this item gets processed. The caller should hold
1724 * one ref already, so normally shouldn't fail, but the channel could
1725 * end up being freed between the time the caller got its reference and
1726 * the time we end up here (e.g., if the client got killed); if so, just
1727 * return.
1728 */
1729 if (!gk20a_channel_get(ch)) {
1730 nvgpu_info(g, "cannot get ch ref for worker!");
1731 return;
1732 }
1733
1734 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
1735 if (!nvgpu_list_empty(&ch->worker_item)) {
1736 /*
1737 * Already queued, so will get processed eventually.
1738 * The worker is probably awake already.
1739 */
1740 nvgpu_spinlock_release(&g->channel_worker.items_lock);
1741 gk20a_channel_put(ch);
1742 return;
1743 }
1744 nvgpu_list_add_tail(&ch->worker_item, &g->channel_worker.items);
1745 nvgpu_spinlock_release(&g->channel_worker.items_lock);
1746
1747 __gk20a_channel_worker_wakeup(g);
1748}
1749
1750int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
1751{
1752 struct priv_cmd_queue *q = &c->priv_cmd_q;
1753 struct gk20a *g = c->g;
1754
1755 if (!e)
1756 return 0;
1757
1758 if (e->valid) {
1759 /* read the entry's valid flag before reading its contents */
1760 nvgpu_smp_rmb();
1761 if ((q->get != e->off) && e->off != 0)
1762 nvgpu_err(g, "requests out-of-order, ch=%d",
1763 c->chid);
1764 q->get = e->off + e->size;
1765 }
1766
1767 free_priv_cmdbuf(c, e);
1768
1769 return 0;
1770}
1771
1772int gk20a_channel_add_job(struct channel_gk20a *c,
1773 struct channel_gk20a_job *job,
1774 bool skip_buffer_refcounting)
1775{
1776 struct vm_gk20a *vm = c->vm;
1777 struct nvgpu_mapped_buf **mapped_buffers = NULL;
1778 int err = 0, num_mapped_buffers = 0;
1779 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
1780
1781 if (!skip_buffer_refcounting) {
1782 err = nvgpu_vm_get_buffers(vm, &mapped_buffers,
1783 &num_mapped_buffers);
1784 if (err)
1785 return err;
1786 }
1787
1788 /*
1789 * Ref to hold the channel open during the job lifetime. This is
1790 * released by job cleanup launched via syncpt or sema interrupt.
1791 */
1792 c = gk20a_channel_get(c);
1793
1794 if (c) {
1795 job->num_mapped_buffers = num_mapped_buffers;
1796 job->mapped_buffers = mapped_buffers;
1797
1798 gk20a_channel_timeout_start(c);
1799
1800 if (!pre_alloc_enabled)
1801 channel_gk20a_joblist_lock(c);
1802
1803 /*
1804 * ensure all pending write complete before adding to the list.
1805 * see corresponding nvgpu_smp_rmb in
1806 * gk20a_channel_clean_up_jobs()
1807 */
1808 nvgpu_smp_wmb();
1809 channel_gk20a_joblist_add(c, job);
1810
1811 if (!pre_alloc_enabled)
1812 channel_gk20a_joblist_unlock(c);
1813 } else {
1814 err = -ETIMEDOUT;
1815 goto err_put_buffers;
1816 }
1817
1818 return 0;
1819
1820err_put_buffers:
1821 nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1822
1823 return err;
1824}
1825
1826/**
1827 * Clean up job resources for further jobs to use.
1828 * @clean_all: If true, process as many jobs as possible, otherwise just one.
1829 *
1830 * Loop all jobs from the joblist until a pending job is found, or just one if
1831 * clean_all is not set. Pending jobs are detected from the job's post fence,
1832 * so this is only done for jobs that have job tracking resources. Free all
1833 * per-job memory for completed jobs; in case of preallocated resources, this
1834 * opens up slots for new jobs to be submitted.
1835 */
1836void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
1837 bool clean_all)
1838{
1839 struct vm_gk20a *vm;
1840 struct channel_gk20a_job *job;
1841 struct gk20a *g;
1842 int job_finished = 0;
1843 bool watchdog_on = false;
1844
1845 c = gk20a_channel_get(c);
1846 if (!c)
1847 return;
1848
1849 if (!c->g->power_on) { /* shutdown case */
1850 gk20a_channel_put(c);
1851 return;
1852 }
1853
1854 vm = c->vm;
1855 g = c->g;
1856
1857 /*
1858 * If !clean_all, we're in a condition where watchdog isn't supported
1859 * anyway (this would be a no-op).
1860 */
1861 if (clean_all)
1862 watchdog_on = gk20a_channel_timeout_stop(c);
1863
1864 /* Synchronize with abort cleanup that needs the jobs. */
1865 nvgpu_mutex_acquire(&c->joblist.cleanup_lock);
1866
1867 while (1) {
1868 bool completed;
1869
1870 channel_gk20a_joblist_lock(c);
1871 if (channel_gk20a_joblist_is_empty(c)) {
1872 /*
1873 * No jobs in flight, timeout will remain stopped until
1874 * new jobs are submitted.
1875 */
1876 channel_gk20a_joblist_unlock(c);
1877 break;
1878 }
1879
1880 /*
1881 * ensure that all subsequent reads occur after checking
1882 * that we have a valid node. see corresponding nvgpu_smp_wmb in
1883 * gk20a_channel_add_job().
1884 */
1885 nvgpu_smp_rmb();
1886 job = channel_gk20a_joblist_peek(c);
1887 channel_gk20a_joblist_unlock(c);
1888
1889 completed = gk20a_fence_is_expired(job->post_fence);
1890 if (!completed) {
1891 /*
1892 * The watchdog eventually sees an updated gp_get if
1893 * something happened in this loop. A new job can have
1894 * been submitted between the above call to stop and
1895 * this - in that case, this is a no-op and the new
1896 * later timeout is still used.
1897 */
1898 if (clean_all && watchdog_on)
1899 gk20a_channel_timeout_continue(c);
1900 break;
1901 }
1902
1903 WARN_ON(!c->sync);
1904
1905 if (c->sync) {
1906 if (c->has_os_fence_framework_support &&
1907 g->os_channel.os_fence_framework_inst_exists(c))
1908 g->os_channel.signal_os_fence_framework(c);
1909
1910 if (g->aggressive_sync_destroy_thresh) {
1911 nvgpu_mutex_acquire(&c->sync_lock);
1912 if (nvgpu_atomic_dec_and_test(
1913 &c->sync->refcount) &&
1914 g->aggressive_sync_destroy) {
1915 gk20a_channel_sync_destroy(c->sync,
1916 false);
1917 c->sync = NULL;
1918 }
1919 nvgpu_mutex_release(&c->sync_lock);
1920 }
1921 }
1922
1923 if (job->num_mapped_buffers)
1924 nvgpu_vm_put_buffers(vm, job->mapped_buffers,
1925 job->num_mapped_buffers);
1926
1927 /* Remove job from channel's job list before we close the
1928 * fences, to prevent other callers (gk20a_channel_abort) from
1929 * trying to dereference post_fence when it no longer exists.
1930 */
1931 channel_gk20a_joblist_lock(c);
1932 channel_gk20a_joblist_delete(c, job);
1933 channel_gk20a_joblist_unlock(c);
1934
1935 /* Close the fence (this will unref the semaphore and release
1936 * it to the pool). */
1937 gk20a_fence_put(job->post_fence);
1938
1939 /* Free the private command buffers (wait_cmd first and
1940 * then incr_cmd i.e. order of allocation) */
1941 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1942 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1943
1944 /* another bookkeeping taken in add_job. caller must hold a ref
1945 * so this wouldn't get freed here. */
1946 gk20a_channel_put(c);
1947
1948 /*
1949 * ensure all pending writes complete before freeing up the job.
1950 * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job().
1951 */
1952 nvgpu_smp_wmb();
1953
1954 channel_gk20a_free_job(c, job);
1955 job_finished = 1;
1956
1957 /*
1958 * Deterministic channels have a channel-wide power reference;
1959 * for others, there's one per submit.
1960 */
1961 if (!c->deterministic)
1962 gk20a_idle(g);
1963
1964 if (!clean_all) {
1965 /* Timeout isn't supported here so don't touch it. */
1966 break;
1967 }
1968 }
1969
1970 nvgpu_mutex_release(&c->joblist.cleanup_lock);
1971
1972 if (job_finished && g->os_channel.work_completion_signal)
1973 g->os_channel.work_completion_signal(c);
1974
1975 gk20a_channel_put(c);
1976}
1977
1978/**
1979 * Schedule a job cleanup work on this channel to free resources and to signal
1980 * about completion.
1981 *
1982 * Call this when there has been an interrupt about finished jobs, or when job
1983 * cleanup needs to be performed, e.g., when closing a channel. This is always
1984 * safe to call even if there is nothing to clean up. Any visible actions on
1985 * jobs just before calling this are guaranteed to be processed.
1986 */
1987void gk20a_channel_update(struct channel_gk20a *c)
1988{
1989 if (!c->g->power_on) { /* shutdown case */
1990 return;
1991 }
1992
1993 trace_gk20a_channel_update(c->chid);
1994 /* A queued channel is always checked for job cleanup. */
1995 gk20a_channel_worker_enqueue(c);
1996}
1997
1998/*
1999 * Stop deterministic channel activity for do_idle() when power needs to go off
2000 * momentarily but deterministic channels keep power refs for potentially a
2001 * long time.
2002 *
2003 * Takes write access on g->deterministic_busy.
2004 *
2005 * Must be paired with gk20a_channel_deterministic_unidle().
2006 */
2007void gk20a_channel_deterministic_idle(struct gk20a *g)
2008{
2009 struct fifo_gk20a *f = &g->fifo;
2010 u32 chid;
2011
2012 /* Grab exclusive access to the hw to block new submits */
2013 nvgpu_rwsem_down_write(&g->deterministic_busy);
2014
2015 for (chid = 0; chid < f->num_channels; chid++) {
2016 struct channel_gk20a *ch = &f->channel[chid];
2017
2018 if (!gk20a_channel_get(ch))
2019 continue;
2020
2021 if (ch->deterministic && !ch->deterministic_railgate_allowed) {
2022 /*
2023 * Drop the power ref taken when setting deterministic
2024 * flag. deterministic_unidle will put this and the
2025 * channel ref back. If railgate is allowed separately
2026 * for this channel, the power ref has already been put
2027 * away.
2028 *
2029 * Hold the channel ref: it must not get freed in
2030 * between. A race could otherwise result in lost
2031 * gk20a_busy() via unidle, and in unbalanced
2032 * gk20a_idle() via closing the channel.
2033 */
2034 gk20a_idle(g);
2035 } else {
2036 /* Not interesting, carry on. */
2037 gk20a_channel_put(ch);
2038 }
2039 }
2040}
2041
2042/*
2043 * Allow deterministic channel activity again for do_unidle().
2044 *
2045 * This releases write access on g->deterministic_busy.
2046 */
2047void gk20a_channel_deterministic_unidle(struct gk20a *g)
2048{
2049 struct fifo_gk20a *f = &g->fifo;
2050 u32 chid;
2051
2052 for (chid = 0; chid < f->num_channels; chid++) {
2053 struct channel_gk20a *ch = &f->channel[chid];
2054
2055 if (!gk20a_channel_get(ch))
2056 continue;
2057
2058 /*
2059 * Deterministic state changes inside deterministic_busy lock,
2060 * which we took in deterministic_idle.
2061 */
2062 if (ch->deterministic && !ch->deterministic_railgate_allowed) {
2063 if (gk20a_busy(g))
2064 nvgpu_err(g, "cannot busy() again!");
2065 /* Took this in idle() */
2066 gk20a_channel_put(ch);
2067 }
2068
2069 gk20a_channel_put(ch);
2070 }
2071
2072 /* Release submits, new deterministic channels and frees */
2073 nvgpu_rwsem_up_write(&g->deterministic_busy);
2074}
2075
2076int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2077{
2078 struct channel_gk20a *c = g->fifo.channel+chid;
2079 int err;
2080
2081 c->g = NULL;
2082 c->chid = chid;
2083 nvgpu_atomic_set(&c->bound, false);
2084 nvgpu_spinlock_init(&c->ref_obtain_lock);
2085 nvgpu_atomic_set(&c->ref_count, 0);
2086 c->referenceable = false;
2087 nvgpu_cond_init(&c->ref_count_dec_wq);
2088
2089#if GK20A_CHANNEL_REFCOUNT_TRACKING
2090 nvgpu_spinlock_init(&c->ref_actions_lock);
2091#endif
2092 nvgpu_spinlock_init(&c->joblist.dynamic.lock);
2093 nvgpu_raw_spinlock_init(&c->timeout.lock);
2094
2095 nvgpu_init_list_node(&c->joblist.dynamic.jobs);
2096 nvgpu_init_list_node(&c->dbg_s_list);
2097 nvgpu_init_list_node(&c->worker_item);
2098
2099 err = nvgpu_mutex_init(&c->ioctl_lock);
2100 if (err)
2101 return err;
2102 err = nvgpu_mutex_init(&c->joblist.cleanup_lock);
2103 if (err)
2104 goto fail_1;
2105 err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
2106 if (err)
2107 goto fail_2;
2108 err = nvgpu_mutex_init(&c->sync_lock);
2109 if (err)
2110 goto fail_3;
2111#if defined(CONFIG_GK20A_CYCLE_STATS)
2112 err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2113 if (err)
2114 goto fail_4;
2115 err = nvgpu_mutex_init(&c->cs_client_mutex);
2116 if (err)
2117 goto fail_5;
2118#endif
2119 err = nvgpu_mutex_init(&c->dbg_s_lock);
2120 if (err)
2121 goto fail_6;
2122
2123 nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);
2124
2125 return 0;
2126
2127fail_6:
2128#if defined(CONFIG_GK20A_CYCLE_STATS)
2129 nvgpu_mutex_destroy(&c->cs_client_mutex);
2130fail_5:
2131 nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
2132fail_4:
2133#endif
2134 nvgpu_mutex_destroy(&c->sync_lock);
2135fail_3:
2136 nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
2137fail_2:
2138 nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
2139fail_1:
2140 nvgpu_mutex_destroy(&c->ioctl_lock);
2141
2142 return err;
2143}
2144
2145/* in this context the "channel" is the host1x channel which
2146 * maps to *all* gk20a channels */
2147int gk20a_channel_suspend(struct gk20a *g)
2148{
2149 struct fifo_gk20a *f = &g->fifo;
2150 u32 chid;
2151 bool channels_in_use = false;
2152 u32 active_runlist_ids = 0;
2153
2154 nvgpu_log_fn(g, " ");
2155
2156 for (chid = 0; chid < f->num_channels; chid++) {
2157 struct channel_gk20a *ch = &f->channel[chid];
2158 if (gk20a_channel_get(ch)) {
2159 nvgpu_log_info(g, "suspend channel %d", chid);
2160 /* disable channel */
2161 gk20a_disable_channel_tsg(g, ch);
2162 /* preempt the channel */
2163 gk20a_fifo_preempt(g, ch);
2164 /* wait for channel update notifiers */
2165 if (g->os_channel.work_completion_cancel_sync)
2166 g->os_channel.work_completion_cancel_sync(ch);
2167
2168 channels_in_use = true;
2169
2170 active_runlist_ids |= BIT(ch->runlist_id);
2171
2172 gk20a_channel_put(ch);
2173 }
2174 }
2175
2176 if (channels_in_use) {
2177 gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true);
2178
2179 for (chid = 0; chid < f->num_channels; chid++) {
2180 if (gk20a_channel_get(&f->channel[chid])) {
2181 g->ops.fifo.unbind_channel(&f->channel[chid]);
2182 gk20a_channel_put(&f->channel[chid]);
2183 }
2184 }
2185 }
2186
2187 nvgpu_log_fn(g, "done");
2188 return 0;
2189}
2190
2191int gk20a_channel_resume(struct gk20a *g)
2192{
2193 struct fifo_gk20a *f = &g->fifo;
2194 u32 chid;
2195 bool channels_in_use = false;
2196 u32 active_runlist_ids = 0;
2197
2198 nvgpu_log_fn(g, " ");
2199
2200 for (chid = 0; chid < f->num_channels; chid++) {
2201 if (gk20a_channel_get(&f->channel[chid])) {
2202 nvgpu_log_info(g, "resume channel %d", chid);
2203 g->ops.fifo.bind_channel(&f->channel[chid]);
2204 channels_in_use = true;
2205 active_runlist_ids |= BIT(f->channel[chid].runlist_id);
2206 gk20a_channel_put(&f->channel[chid]);
2207 }
2208 }
2209
2210 if (channels_in_use)
2211 gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true);
2212
2213 nvgpu_log_fn(g, "done");
2214 return 0;
2215}
2216
2217void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
2218{
2219 struct fifo_gk20a *f = &g->fifo;
2220 u32 chid;
2221
2222 nvgpu_log_fn(g, " ");
2223
2224 /*
2225 * Ensure that all pending writes are actually done before trying to
2226 * read semaphore values from DRAM.
2227 */
2228 g->ops.mm.fb_flush(g);
2229
2230 for (chid = 0; chid < f->num_channels; chid++) {
2231 struct channel_gk20a *c = g->fifo.channel+chid;
2232 if (gk20a_channel_get(c)) {
2233 if (nvgpu_atomic_read(&c->bound)) {
2234 nvgpu_cond_broadcast_interruptible(
2235 &c->semaphore_wq);
2236 if (post_events) {
2237 if (gk20a_is_channel_marked_as_tsg(c)) {
2238 struct tsg_gk20a *tsg =
2239 &g->fifo.tsg[c->tsgid];
2240
2241 g->ops.fifo.post_event_id(tsg,
2242 NVGPU_EVENT_ID_BLOCKING_SYNC);
2243 }
2244 }
2245 /*
2246 * Only non-deterministic channels get the
2247 * channel_update callback. We don't allow
2248 * semaphore-backed syncs for these channels
2249 * anyways, since they have a dependency on
2250 * the sync framework.
2251 * If deterministic channels are receiving a
2252 * semaphore wakeup, it must be for a
2253 * user-space managed
2254 * semaphore.
2255 */
2256 if (!c->deterministic)
2257 gk20a_channel_update(c);
2258 }
2259 gk20a_channel_put(c);
2260 }
2261 }
2262}