diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2425 |
1 files changed, 2425 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c new file mode 100644 index 00000000..4be232f1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -0,0 +1,2425 @@ | |||
1 | /* | ||
2 | * GK20A Graphics channel | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | #include <uapi/linux/nvgpu.h> | ||
27 | |||
28 | #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_GK20A_CYCLE_STATS) | ||
29 | #include <linux/dma-buf.h> | ||
30 | #endif | ||
31 | |||
32 | #include <nvgpu/semaphore.h> | ||
33 | #include <nvgpu/timers.h> | ||
34 | #include <nvgpu/kmem.h> | ||
35 | #include <nvgpu/dma.h> | ||
36 | #include <nvgpu/log.h> | ||
37 | #include <nvgpu/atomic.h> | ||
38 | #include <nvgpu/bug.h> | ||
39 | #include <nvgpu/list.h> | ||
40 | #include <nvgpu/circ_buf.h> | ||
41 | #include <nvgpu/cond.h> | ||
42 | #include <nvgpu/enabled.h> | ||
43 | #include <nvgpu/debug.h> | ||
44 | #include <nvgpu/ltc.h> | ||
45 | #include <nvgpu/barrier.h> | ||
46 | #include <nvgpu/ctxsw_trace.h> | ||
47 | |||
48 | #include "gk20a.h" | ||
49 | #include "dbg_gpu_gk20a.h" | ||
50 | #include "fence_gk20a.h" | ||
51 | |||
52 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); | ||
53 | static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c); | ||
54 | |||
55 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); | ||
56 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); | ||
57 | |||
58 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c); | ||
59 | |||
60 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, | ||
61 | struct channel_gk20a_job *job); | ||
62 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
63 | struct channel_gk20a_job *job); | ||
64 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
65 | struct channel_gk20a *c); | ||
66 | |||
67 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); | ||
68 | |||
69 | /* allocate GPU channel */ | ||
70 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | ||
71 | { | ||
72 | struct channel_gk20a *ch = NULL; | ||
73 | struct gk20a *g = f->g; | ||
74 | |||
75 | nvgpu_mutex_acquire(&f->free_chs_mutex); | ||
76 | if (!nvgpu_list_empty(&f->free_chs)) { | ||
77 | ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a, | ||
78 | free_chs); | ||
79 | nvgpu_list_del(&ch->free_chs); | ||
80 | WARN_ON(nvgpu_atomic_read(&ch->ref_count)); | ||
81 | WARN_ON(ch->referenceable); | ||
82 | f->used_channels++; | ||
83 | } | ||
84 | nvgpu_mutex_release(&f->free_chs_mutex); | ||
85 | |||
86 | if (g->aggressive_sync_destroy_thresh && | ||
87 | (f->used_channels > | ||
88 | g->aggressive_sync_destroy_thresh)) | ||
89 | g->aggressive_sync_destroy = true; | ||
90 | |||
91 | return ch; | ||
92 | } | ||
93 | |||
94 | static void free_channel(struct fifo_gk20a *f, | ||
95 | struct channel_gk20a *ch) | ||
96 | { | ||
97 | struct gk20a *g = f->g; | ||
98 | |||
99 | trace_gk20a_release_used_channel(ch->chid); | ||
100 | /* refcount is zero here and channel is in a freed/dead state */ | ||
101 | nvgpu_mutex_acquire(&f->free_chs_mutex); | ||
102 | /* add to head to increase visibility of timing-related bugs */ | ||
103 | nvgpu_list_add(&ch->free_chs, &f->free_chs); | ||
104 | f->used_channels--; | ||
105 | nvgpu_mutex_release(&f->free_chs_mutex); | ||
106 | |||
107 | /* | ||
108 | * On teardown it is not possible to dereference platform, but ignoring | ||
109 | * this is fine then because no new channels would be created. | ||
110 | */ | ||
111 | if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { | ||
112 | if (g->aggressive_sync_destroy_thresh && | ||
113 | (f->used_channels < | ||
114 | g->aggressive_sync_destroy_thresh)) | ||
115 | g->aggressive_sync_destroy = false; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | int channel_gk20a_commit_va(struct channel_gk20a *c) | ||
120 | { | ||
121 | struct gk20a *g = c->g; | ||
122 | |||
123 | gk20a_dbg_fn(""); | ||
124 | |||
125 | g->ops.mm.init_inst_block(&c->inst_block, c->vm, | ||
126 | c->vm->gmmu_page_sizes[gmmu_page_size_big]); | ||
127 | |||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | u32 gk20a_channel_get_timeslice(struct channel_gk20a *ch) | ||
132 | { | ||
133 | struct gk20a *g = ch->g; | ||
134 | |||
135 | if (!ch->timeslice_us) | ||
136 | return g->ops.fifo.default_timeslice_us(g); | ||
137 | |||
138 | return ch->timeslice_us; | ||
139 | } | ||
140 | |||
141 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | ||
142 | int timeslice_period, | ||
143 | int *__timeslice_timeout, int *__timeslice_scale) | ||
144 | { | ||
145 | int value = scale_ptimer(timeslice_period, | ||
146 | ptimer_scalingfactor10x(g->ptimer_src_freq)); | ||
147 | int shift = 0; | ||
148 | |||
149 | /* value field is 8 bits long */ | ||
150 | while (value >= 1 << 8) { | ||
151 | value >>= 1; | ||
152 | shift++; | ||
153 | } | ||
154 | |||
155 | /* time slice register is only 18bits long */ | ||
156 | if ((value << shift) >= 1<<19) { | ||
157 | pr_err("Requested timeslice value is clamped to 18 bits\n"); | ||
158 | value = 255; | ||
159 | shift = 10; | ||
160 | } | ||
161 | |||
162 | *__timeslice_timeout = value; | ||
163 | *__timeslice_scale = shift; | ||
164 | |||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add) | ||
169 | { | ||
170 | return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->chid, add, true); | ||
171 | } | ||
172 | |||
173 | int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) | ||
174 | { | ||
175 | struct tsg_gk20a *tsg; | ||
176 | |||
177 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
178 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
179 | g->ops.fifo.enable_tsg(tsg); | ||
180 | } else { | ||
181 | g->ops.fifo.enable_channel(ch); | ||
182 | } | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch) | ||
188 | { | ||
189 | struct tsg_gk20a *tsg; | ||
190 | |||
191 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
192 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
193 | g->ops.fifo.disable_tsg(tsg); | ||
194 | } else { | ||
195 | g->ops.fifo.disable_channel(ch); | ||
196 | } | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | ||
202 | { | ||
203 | struct channel_gk20a_job *job, *n; | ||
204 | bool released_job_semaphore = false; | ||
205 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); | ||
206 | |||
207 | /* synchronize with actual job cleanup */ | ||
208 | nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); | ||
209 | |||
210 | /* ensure no fences are pending */ | ||
211 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
212 | if (ch->sync) | ||
213 | ch->sync->set_min_eq_max(ch->sync); | ||
214 | nvgpu_mutex_release(&ch->sync_lock); | ||
215 | |||
216 | /* release all job semaphores (applies only to jobs that use | ||
217 | semaphore synchronization) */ | ||
218 | channel_gk20a_joblist_lock(ch); | ||
219 | if (pre_alloc_enabled) { | ||
220 | int tmp_get = ch->joblist.pre_alloc.get; | ||
221 | int put = ch->joblist.pre_alloc.put; | ||
222 | |||
223 | /* | ||
224 | * ensure put is read before any subsequent reads. | ||
225 | * see corresponding nvgpu_smp_wmb in gk20a_channel_add_job() | ||
226 | */ | ||
227 | nvgpu_smp_rmb(); | ||
228 | |||
229 | while (tmp_get != put) { | ||
230 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; | ||
231 | if (job->post_fence->semaphore) { | ||
232 | __nvgpu_semaphore_release( | ||
233 | job->post_fence->semaphore, true); | ||
234 | released_job_semaphore = true; | ||
235 | } | ||
236 | tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; | ||
237 | } | ||
238 | } else { | ||
239 | nvgpu_list_for_each_entry_safe(job, n, | ||
240 | &ch->joblist.dynamic.jobs, | ||
241 | channel_gk20a_job, list) { | ||
242 | if (job->post_fence->semaphore) { | ||
243 | __nvgpu_semaphore_release( | ||
244 | job->post_fence->semaphore, true); | ||
245 | released_job_semaphore = true; | ||
246 | } | ||
247 | } | ||
248 | } | ||
249 | channel_gk20a_joblist_unlock(ch); | ||
250 | |||
251 | nvgpu_mutex_release(&ch->joblist.cleanup_lock); | ||
252 | |||
253 | if (released_job_semaphore) | ||
254 | nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); | ||
255 | |||
256 | /* | ||
257 | * When closing the channel, this scheduled update holds one ref which | ||
258 | * is waited for before advancing with freeing. | ||
259 | */ | ||
260 | gk20a_channel_update(ch); | ||
261 | } | ||
262 | |||
263 | void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) | ||
264 | { | ||
265 | gk20a_dbg_fn(""); | ||
266 | |||
267 | if (gk20a_is_channel_marked_as_tsg(ch)) | ||
268 | return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt); | ||
269 | |||
270 | /* make sure new kickoffs are prevented */ | ||
271 | ch->has_timedout = true; | ||
272 | |||
273 | ch->g->ops.fifo.disable_channel(ch); | ||
274 | |||
275 | if (channel_preempt && ch->ch_ctx.gr_ctx) | ||
276 | ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); | ||
277 | |||
278 | gk20a_channel_abort_clean_up(ch); | ||
279 | } | ||
280 | |||
281 | int gk20a_wait_channel_idle(struct channel_gk20a *ch) | ||
282 | { | ||
283 | bool channel_idle = false; | ||
284 | struct nvgpu_timeout timeout; | ||
285 | |||
286 | nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g), | ||
287 | NVGPU_TIMER_CPU_TIMER); | ||
288 | |||
289 | do { | ||
290 | channel_gk20a_joblist_lock(ch); | ||
291 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
292 | channel_gk20a_joblist_unlock(ch); | ||
293 | if (channel_idle) | ||
294 | break; | ||
295 | |||
296 | nvgpu_usleep_range(1000, 3000); | ||
297 | } while (!nvgpu_timeout_expired(&timeout)); | ||
298 | |||
299 | if (!channel_idle) { | ||
300 | nvgpu_err(ch->g, "jobs not freed for channel %d", | ||
301 | ch->chid); | ||
302 | return -EBUSY; | ||
303 | } | ||
304 | |||
305 | return 0; | ||
306 | } | ||
307 | |||
308 | void gk20a_disable_channel(struct channel_gk20a *ch) | ||
309 | { | ||
310 | gk20a_channel_abort(ch, true); | ||
311 | channel_gk20a_update_runlist(ch, false); | ||
312 | } | ||
313 | |||
314 | int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, | ||
315 | u32 level) | ||
316 | { | ||
317 | struct gk20a *g = ch->g; | ||
318 | int ret; | ||
319 | |||
320 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
321 | nvgpu_err(g, "invalid operation for TSG!"); | ||
322 | return -EINVAL; | ||
323 | } | ||
324 | |||
325 | switch (level) { | ||
326 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: | ||
327 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | ||
328 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH: | ||
329 | ret = g->ops.fifo.set_runlist_interleave(g, ch->chid, | ||
330 | false, 0, level); | ||
331 | break; | ||
332 | default: | ||
333 | ret = -EINVAL; | ||
334 | break; | ||
335 | } | ||
336 | |||
337 | gk20a_dbg(gpu_dbg_sched, "chid=%u interleave=%u", ch->chid, level); | ||
338 | |||
339 | return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true); | ||
340 | } | ||
341 | |||
342 | /** | ||
343 | * gk20a_set_error_notifier_locked() | ||
344 | * Should be called with ch->error_notifier_mutex held | ||
345 | */ | ||
346 | void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error) | ||
347 | { | ||
348 | if (ch->error_notifier_ref) { | ||
349 | struct timespec time_data; | ||
350 | u64 nsec; | ||
351 | getnstimeofday(&time_data); | ||
352 | nsec = ((u64)time_data.tv_sec) * 1000000000u + | ||
353 | (u64)time_data.tv_nsec; | ||
354 | ch->error_notifier->time_stamp.nanoseconds[0] = | ||
355 | (u32)nsec; | ||
356 | ch->error_notifier->time_stamp.nanoseconds[1] = | ||
357 | (u32)(nsec >> 32); | ||
358 | ch->error_notifier->info32 = error; | ||
359 | ch->error_notifier->status = 0xffff; | ||
360 | |||
361 | nvgpu_err(ch->g, | ||
362 | "error notifier set to %d for ch %d", error, ch->chid); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) | ||
367 | { | ||
368 | nvgpu_mutex_acquire(&ch->error_notifier_mutex); | ||
369 | gk20a_set_error_notifier_locked(ch, error); | ||
370 | nvgpu_mutex_release(&ch->error_notifier_mutex); | ||
371 | } | ||
372 | |||
373 | static void gk20a_wait_until_counter_is_N( | ||
374 | struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value, | ||
375 | struct nvgpu_cond *c, const char *caller, const char *counter_name) | ||
376 | { | ||
377 | while (true) { | ||
378 | if (NVGPU_COND_WAIT( | ||
379 | c, | ||
380 | nvgpu_atomic_read(counter) == wait_value, | ||
381 | 5000) == 0) | ||
382 | break; | ||
383 | |||
384 | nvgpu_warn(ch->g, | ||
385 | "%s: channel %d, still waiting, %s left: %d, waiting for: %d", | ||
386 | caller, ch->chid, counter_name, | ||
387 | nvgpu_atomic_read(counter), wait_value); | ||
388 | |||
389 | gk20a_channel_dump_ref_actions(ch); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
394 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) | ||
395 | { | ||
396 | /* disable existing cyclestats buffer */ | ||
397 | nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); | ||
398 | if (ch->cyclestate.cyclestate_buffer_handler) { | ||
399 | dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler, | ||
400 | ch->cyclestate.cyclestate_buffer); | ||
401 | dma_buf_put(ch->cyclestate.cyclestate_buffer_handler); | ||
402 | ch->cyclestate.cyclestate_buffer_handler = NULL; | ||
403 | ch->cyclestate.cyclestate_buffer = NULL; | ||
404 | ch->cyclestate.cyclestate_buffer_size = 0; | ||
405 | } | ||
406 | nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); | ||
407 | } | ||
408 | |||
409 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
410 | { | ||
411 | int ret; | ||
412 | |||
413 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
414 | if (ch->cs_client) { | ||
415 | ret = gr_gk20a_css_detach(ch, ch->cs_client); | ||
416 | ch->cs_client = NULL; | ||
417 | } else { | ||
418 | ret = 0; | ||
419 | } | ||
420 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
421 | |||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | #endif | ||
426 | |||
427 | /* call ONLY when no references to the channel exist: after the last put */ | ||
428 | static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | ||
429 | { | ||
430 | struct gk20a *g = ch->g; | ||
431 | struct fifo_gk20a *f = &g->fifo; | ||
432 | struct gr_gk20a *gr = &g->gr; | ||
433 | struct vm_gk20a *ch_vm = ch->vm; | ||
434 | unsigned long timeout = gk20a_get_gr_idle_timeout(g); | ||
435 | struct dbg_session_gk20a *dbg_s; | ||
436 | struct dbg_session_data *session_data, *tmp_s; | ||
437 | struct dbg_session_channel_data *ch_data, *tmp; | ||
438 | bool was_tsg = false; | ||
439 | int err; | ||
440 | |||
441 | gk20a_dbg_fn(""); | ||
442 | |||
443 | WARN_ON(ch->g == NULL); | ||
444 | |||
445 | trace_gk20a_free_channel(ch->chid); | ||
446 | |||
447 | /* | ||
448 | * Disable channel/TSG and unbind here. This should not be executed if | ||
449 | * HW access is not available during shutdown/removal path as it will | ||
450 | * trigger a timeout | ||
451 | */ | ||
452 | if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { | ||
453 | /* abort channel and remove from runlist */ | ||
454 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
455 | err = g->ops.fifo.tsg_unbind_channel(ch); | ||
456 | if (err) | ||
457 | nvgpu_err(g, | ||
458 | "failed to unbind channel %d from TSG", | ||
459 | ch->chid); | ||
460 | /* | ||
461 | * Channel is not a part of TSG this point onwards | ||
462 | * So stash its status and use it whenever necessary | ||
463 | * e.g. while releasing gr_ctx in | ||
464 | * g->ops.gr.free_channel_ctx() | ||
465 | */ | ||
466 | was_tsg = true; | ||
467 | } else { | ||
468 | gk20a_disable_channel(ch); | ||
469 | } | ||
470 | } | ||
471 | /* wait until there's only our ref to the channel */ | ||
472 | if (!force) | ||
473 | gk20a_wait_until_counter_is_N( | ||
474 | ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, | ||
475 | __func__, "references"); | ||
476 | |||
477 | /* wait until all pending interrupts for recently completed | ||
478 | * jobs are handled */ | ||
479 | nvgpu_wait_for_deferred_interrupts(g); | ||
480 | |||
481 | /* prevent new refs */ | ||
482 | nvgpu_spinlock_acquire(&ch->ref_obtain_lock); | ||
483 | if (!ch->referenceable) { | ||
484 | nvgpu_spinlock_release(&ch->ref_obtain_lock); | ||
485 | nvgpu_err(ch->g, | ||
486 | "Extra %s() called to channel %u", | ||
487 | __func__, ch->chid); | ||
488 | return; | ||
489 | } | ||
490 | ch->referenceable = false; | ||
491 | nvgpu_spinlock_release(&ch->ref_obtain_lock); | ||
492 | |||
493 | /* matches with the initial reference in gk20a_open_new_channel() */ | ||
494 | nvgpu_atomic_dec(&ch->ref_count); | ||
495 | |||
496 | /* wait until no more refs to the channel */ | ||
497 | if (!force) | ||
498 | gk20a_wait_until_counter_is_N( | ||
499 | ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, | ||
500 | __func__, "references"); | ||
501 | |||
502 | /* if engine reset was deferred, perform it now */ | ||
503 | nvgpu_mutex_acquire(&f->deferred_reset_mutex); | ||
504 | if (g->fifo.deferred_reset_pending) { | ||
505 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" | ||
506 | " deferred, running now"); | ||
507 | /* if lock is already taken, a reset is taking place | ||
508 | so no need to repeat */ | ||
509 | if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) { | ||
510 | gk20a_fifo_deferred_reset(g, ch); | ||
511 | nvgpu_mutex_release(&g->fifo.gr_reset_mutex); | ||
512 | } | ||
513 | } | ||
514 | nvgpu_mutex_release(&f->deferred_reset_mutex); | ||
515 | |||
516 | if (!gk20a_channel_as_bound(ch)) | ||
517 | goto unbind; | ||
518 | |||
519 | gk20a_dbg_info("freeing bound channel context, timeout=%ld", | ||
520 | timeout); | ||
521 | |||
522 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
523 | if (g->ops.fecs_trace.unbind_channel && !ch->vpr) | ||
524 | g->ops.fecs_trace.unbind_channel(g, ch); | ||
525 | #endif | ||
526 | |||
527 | /* release channel ctx */ | ||
528 | g->ops.gr.free_channel_ctx(ch, was_tsg); | ||
529 | |||
530 | gk20a_gr_flush_channel_tlb(gr); | ||
531 | |||
532 | nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem); | ||
533 | nvgpu_big_free(g, ch->gpfifo.pipe); | ||
534 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); | ||
535 | |||
536 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
537 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
538 | gk20a_channel_free_cycle_stats_snapshot(ch); | ||
539 | #endif | ||
540 | |||
541 | channel_gk20a_free_priv_cmdbuf(ch); | ||
542 | |||
543 | /* sync must be destroyed before releasing channel vm */ | ||
544 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
545 | if (ch->sync) { | ||
546 | gk20a_channel_sync_destroy(ch->sync); | ||
547 | ch->sync = NULL; | ||
548 | } | ||
549 | nvgpu_mutex_release(&ch->sync_lock); | ||
550 | |||
551 | /* | ||
552 | * free the channel used semaphore index. | ||
553 | * we need to do this before releasing the address space, | ||
554 | * as the semaphore pool might get freed after that point. | ||
555 | */ | ||
556 | if (ch->hw_sema) | ||
557 | nvgpu_semaphore_free_hw_sema(ch); | ||
558 | |||
559 | /* | ||
560 | * When releasing the channel we unbind the VM - so release the ref. | ||
561 | */ | ||
562 | nvgpu_vm_put(ch_vm); | ||
563 | |||
564 | nvgpu_spinlock_acquire(&ch->update_fn_lock); | ||
565 | ch->update_fn = NULL; | ||
566 | ch->update_fn_data = NULL; | ||
567 | nvgpu_spinlock_release(&ch->update_fn_lock); | ||
568 | cancel_work_sync(&ch->update_fn_work); | ||
569 | |||
570 | /* make sure we don't have deferred interrupts pending that | ||
571 | * could still touch the channel */ | ||
572 | nvgpu_wait_for_deferred_interrupts(g); | ||
573 | |||
574 | unbind: | ||
575 | g->ops.fifo.unbind_channel(ch); | ||
576 | g->ops.fifo.free_inst(g, ch); | ||
577 | |||
578 | /* put back the channel-wide submit ref from init */ | ||
579 | if (ch->deterministic) { | ||
580 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
581 | ch->deterministic = false; | ||
582 | if (!ch->deterministic_railgate_allowed) | ||
583 | gk20a_idle(g); | ||
584 | ch->deterministic_railgate_allowed = false; | ||
585 | |||
586 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
587 | } | ||
588 | |||
589 | ch->vpr = false; | ||
590 | ch->vm = NULL; | ||
591 | |||
592 | WARN_ON(ch->sync); | ||
593 | |||
594 | /* unlink all debug sessions */ | ||
595 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
596 | |||
597 | list_for_each_entry_safe(session_data, tmp_s, | ||
598 | &ch->dbg_s_list, dbg_s_entry) { | ||
599 | dbg_s = session_data->dbg_s; | ||
600 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
601 | list_for_each_entry_safe(ch_data, tmp, | ||
602 | &dbg_s->ch_list, ch_entry) { | ||
603 | if (ch_data->chid == ch->chid) | ||
604 | ch_data->unbind_single_channel(dbg_s, ch_data); | ||
605 | } | ||
606 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
607 | } | ||
608 | |||
609 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
610 | |||
611 | /* free pre-allocated resources, if applicable */ | ||
612 | if (channel_gk20a_is_prealloc_enabled(ch)) | ||
613 | channel_gk20a_free_prealloc_resources(ch); | ||
614 | |||
615 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
616 | memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); | ||
617 | ch->ref_actions_put = 0; | ||
618 | #endif | ||
619 | |||
620 | /* make sure we catch accesses of unopened channels in case | ||
621 | * there's non-refcounted channel pointers hanging around */ | ||
622 | ch->g = NULL; | ||
623 | nvgpu_smp_wmb(); | ||
624 | |||
625 | /* ALWAYS last */ | ||
626 | free_channel(f, ch); | ||
627 | } | ||
628 | |||
629 | static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch) | ||
630 | { | ||
631 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
632 | size_t i, get; | ||
633 | s64 now = nvgpu_current_time_ms(); | ||
634 | s64 prev = 0; | ||
635 | struct device *dev = dev_from_gk20a(ch->g); | ||
636 | |||
637 | nvgpu_spinlock_acquire(&ch->ref_actions_lock); | ||
638 | |||
639 | dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n", | ||
640 | ch->chid, nvgpu_atomic_read(&ch->ref_count)); | ||
641 | |||
642 | /* start at the oldest possible entry. put is next insertion point */ | ||
643 | get = ch->ref_actions_put; | ||
644 | |||
645 | /* | ||
646 | * If the buffer is not full, this will first loop to the oldest entry, | ||
647 | * skipping not-yet-initialized entries. There is no ref_actions_get. | ||
648 | */ | ||
649 | for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { | ||
650 | struct channel_gk20a_ref_action *act = &ch->ref_actions[get]; | ||
651 | |||
652 | if (act->trace.nr_entries) { | ||
653 | dev_info(dev, "%s ref %zu steps ago (age %d ms, diff %d ms)\n", | ||
654 | act->type == channel_gk20a_ref_action_get | ||
655 | ? "GET" : "PUT", | ||
656 | GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, | ||
657 | now - act->timestamp_ms, | ||
658 | act->timestamp_ms - prev); | ||
659 | |||
660 | print_stack_trace(&act->trace, 0); | ||
661 | prev = act->timestamp_ms; | ||
662 | } | ||
663 | |||
664 | get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; | ||
665 | } | ||
666 | |||
667 | nvgpu_spinlock_release(&ch->ref_actions_lock); | ||
668 | #endif | ||
669 | } | ||
670 | |||
671 | static void gk20a_channel_save_ref_source(struct channel_gk20a *ch, | ||
672 | enum channel_gk20a_ref_action_type type) | ||
673 | { | ||
674 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
675 | struct channel_gk20a_ref_action *act; | ||
676 | |||
677 | nvgpu_spinlock_acquire(&ch->ref_actions_lock); | ||
678 | |||
679 | act = &ch->ref_actions[ch->ref_actions_put]; | ||
680 | act->type = type; | ||
681 | act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; | ||
682 | act->trace.nr_entries = 0; | ||
683 | act->trace.skip = 3; /* onwards from the caller of this */ | ||
684 | act->trace.entries = act->trace_entries; | ||
685 | save_stack_trace(&act->trace); | ||
686 | act->timestamp_ms = nvgpu_current_time_ms(); | ||
687 | ch->ref_actions_put = (ch->ref_actions_put + 1) % | ||
688 | GK20A_CHANNEL_REFCOUNT_TRACKING; | ||
689 | |||
690 | nvgpu_spinlock_release(&ch->ref_actions_lock); | ||
691 | #endif | ||
692 | } | ||
693 | |||
694 | /* Try to get a reference to the channel. Return nonzero on success. If fails, | ||
695 | * the channel is dead or being freed elsewhere and you must not touch it. | ||
696 | * | ||
697 | * Always when a channel_gk20a pointer is seen and about to be used, a | ||
698 | * reference must be held to it - either by you or the caller, which should be | ||
699 | * documented well or otherwise clearly seen. This usually boils down to the | ||
700 | * file from ioctls directly, or an explicit get in exception handlers when the | ||
701 | * channel is found by a chid. | ||
702 | * | ||
703 | * Most global functions in this file require a reference to be held by the | ||
704 | * caller. | ||
705 | */ | ||
706 | struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, | ||
707 | const char *caller) { | ||
708 | struct channel_gk20a *ret; | ||
709 | |||
710 | nvgpu_spinlock_acquire(&ch->ref_obtain_lock); | ||
711 | |||
712 | if (likely(ch->referenceable)) { | ||
713 | gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get); | ||
714 | nvgpu_atomic_inc(&ch->ref_count); | ||
715 | ret = ch; | ||
716 | } else | ||
717 | ret = NULL; | ||
718 | |||
719 | nvgpu_spinlock_release(&ch->ref_obtain_lock); | ||
720 | |||
721 | if (ret) | ||
722 | trace_gk20a_channel_get(ch->chid, caller); | ||
723 | |||
724 | return ret; | ||
725 | } | ||
726 | |||
727 | void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) | ||
728 | { | ||
729 | gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put); | ||
730 | trace_gk20a_channel_put(ch->chid, caller); | ||
731 | nvgpu_atomic_dec(&ch->ref_count); | ||
732 | nvgpu_cond_broadcast(&ch->ref_count_dec_wq); | ||
733 | |||
734 | /* More puts than gets. Channel is probably going to get | ||
735 | * stuck. */ | ||
736 | WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); | ||
737 | |||
738 | /* Also, more puts than gets. ref_count can go to 0 only if | ||
739 | * the channel is closing. Channel is probably going to get | ||
740 | * stuck. */ | ||
741 | WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); | ||
742 | } | ||
743 | |||
744 | void gk20a_channel_close(struct channel_gk20a *ch) | ||
745 | { | ||
746 | gk20a_free_channel(ch, false); | ||
747 | } | ||
748 | |||
749 | /* | ||
750 | * Be careful with this - it is meant for terminating channels when we know the | ||
751 | * driver is otherwise dying. Ref counts and the like are ignored by this | ||
752 | * version of the cleanup. | ||
753 | */ | ||
754 | void __gk20a_channel_kill(struct channel_gk20a *ch) | ||
755 | { | ||
756 | gk20a_free_channel(ch, true); | ||
757 | } | ||
758 | |||
759 | static void gk20a_channel_update_runcb_fn(struct work_struct *work) | ||
760 | { | ||
761 | struct channel_gk20a *ch = | ||
762 | container_of(work, struct channel_gk20a, update_fn_work); | ||
763 | void (*update_fn)(struct channel_gk20a *, void *); | ||
764 | void *update_fn_data; | ||
765 | |||
766 | nvgpu_spinlock_acquire(&ch->update_fn_lock); | ||
767 | update_fn = ch->update_fn; | ||
768 | update_fn_data = ch->update_fn_data; | ||
769 | nvgpu_spinlock_release(&ch->update_fn_lock); | ||
770 | |||
771 | if (update_fn) | ||
772 | update_fn(ch, update_fn_data); | ||
773 | } | ||
774 | |||
775 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
776 | void (*update_fn)(struct channel_gk20a *, void *), | ||
777 | void *update_fn_data, | ||
778 | int runlist_id, | ||
779 | bool is_privileged_channel) | ||
780 | { | ||
781 | struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel); | ||
782 | |||
783 | if (ch) { | ||
784 | nvgpu_spinlock_acquire(&ch->update_fn_lock); | ||
785 | ch->update_fn = update_fn; | ||
786 | ch->update_fn_data = update_fn_data; | ||
787 | nvgpu_spinlock_release(&ch->update_fn_lock); | ||
788 | } | ||
789 | |||
790 | return ch; | ||
791 | } | ||
792 | |||
793 | struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | ||
794 | s32 runlist_id, | ||
795 | bool is_privileged_channel) | ||
796 | { | ||
797 | struct fifo_gk20a *f = &g->fifo; | ||
798 | struct channel_gk20a *ch; | ||
799 | struct gk20a_event_id_data *event_id_data, *event_id_data_temp; | ||
800 | |||
801 | /* compatibility with existing code */ | ||
802 | if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) { | ||
803 | runlist_id = gk20a_fifo_get_gr_runlist_id(g); | ||
804 | } | ||
805 | |||
806 | gk20a_dbg_fn(""); | ||
807 | |||
808 | ch = allocate_channel(f); | ||
809 | if (ch == NULL) { | ||
810 | /* TBD: we want to make this virtualizable */ | ||
811 | nvgpu_err(g, "out of hw chids"); | ||
812 | return NULL; | ||
813 | } | ||
814 | |||
815 | trace_gk20a_open_new_channel(ch->chid); | ||
816 | |||
817 | BUG_ON(ch->g); | ||
818 | ch->g = g; | ||
819 | |||
820 | /* Runlist for the channel */ | ||
821 | ch->runlist_id = runlist_id; | ||
822 | |||
823 | /* Channel privilege level */ | ||
824 | ch->is_privileged_channel = is_privileged_channel; | ||
825 | |||
826 | if (g->ops.fifo.alloc_inst(g, ch)) { | ||
827 | ch->g = NULL; | ||
828 | free_channel(f, ch); | ||
829 | nvgpu_err(g, | ||
830 | "failed to open gk20a channel, out of inst mem"); | ||
831 | return NULL; | ||
832 | } | ||
833 | |||
834 | /* now the channel is in a limbo out of the free list but not marked as | ||
835 | * alive and used (i.e. get-able) yet */ | ||
836 | |||
837 | ch->pid = current->pid; | ||
838 | ch->tgid = current->tgid; /* process granularity for FECS traces */ | ||
839 | |||
840 | /* unhook all events created on this channel */ | ||
841 | nvgpu_mutex_acquire(&ch->event_id_list_lock); | ||
842 | nvgpu_list_for_each_entry_safe(event_id_data, event_id_data_temp, | ||
843 | &ch->event_id_list, | ||
844 | gk20a_event_id_data, | ||
845 | event_id_node) { | ||
846 | nvgpu_list_del(&event_id_data->event_id_node); | ||
847 | } | ||
848 | nvgpu_mutex_release(&ch->event_id_list_lock); | ||
849 | |||
850 | /* By default, channel is regular (non-TSG) channel */ | ||
851 | ch->tsgid = NVGPU_INVALID_TSG_ID; | ||
852 | |||
853 | /* reset timeout counter and update timestamp */ | ||
854 | ch->timeout_accumulated_ms = 0; | ||
855 | ch->timeout_gpfifo_get = 0; | ||
856 | /* set gr host default timeout */ | ||
857 | ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); | ||
858 | ch->timeout_debug_dump = true; | ||
859 | ch->has_timedout = false; | ||
860 | ch->wdt_enabled = true; | ||
861 | ch->obj_class = 0; | ||
862 | ch->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | ||
863 | ch->timeslice_us = g->timeslice_low_priority_us; | ||
864 | #ifdef CONFIG_TEGRA_19x_GPU | ||
865 | memset(&ch->t19x, 0, sizeof(struct channel_t19x)); | ||
866 | #endif | ||
867 | |||
868 | |||
869 | /* The channel is *not* runnable at this point. It still needs to have | ||
870 | * an address space bound and allocate a gpfifo and grctx. */ | ||
871 | |||
872 | nvgpu_cond_init(&ch->notifier_wq); | ||
873 | nvgpu_cond_init(&ch->semaphore_wq); | ||
874 | |||
875 | ch->update_fn = NULL; | ||
876 | ch->update_fn_data = NULL; | ||
877 | nvgpu_spinlock_init(&ch->update_fn_lock); | ||
878 | INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); | ||
879 | |||
880 | /* Mark the channel alive, get-able, with 1 initial use | ||
881 | * references. The initial reference will be decreased in | ||
882 | * gk20a_free_channel() */ | ||
883 | ch->referenceable = true; | ||
884 | nvgpu_atomic_set(&ch->ref_count, 1); | ||
885 | nvgpu_smp_wmb(); | ||
886 | |||
887 | return ch; | ||
888 | } | ||
889 | |||
890 | /* allocate private cmd buffer. | ||
891 | used for inserting commands before/after user submitted buffers. */ | ||
892 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c) | ||
893 | { | ||
894 | struct gk20a *g = c->g; | ||
895 | struct vm_gk20a *ch_vm = c->vm; | ||
896 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
897 | u32 size; | ||
898 | int err = 0; | ||
899 | |||
900 | /* | ||
901 | * Compute the amount of priv_cmdbuf space we need. In general the worst | ||
902 | * case is the kernel inserts both a semaphore pre-fence and post-fence. | ||
903 | * Any sync-pt fences will take less memory so we can ignore them for | ||
904 | * now. | ||
905 | * | ||
906 | * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b, | ||
907 | * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10 | ||
908 | * dwords: all the same as an ACQ plus a non-stalling intr which is | ||
909 | * another 2 dwords. | ||
910 | * | ||
911 | * Lastly the number of gpfifo entries per channel is fixed so at most | ||
912 | * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one | ||
913 | * userspace entry, and one post-fence entry). Thus the computation is: | ||
914 | * | ||
915 | * (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes. | ||
916 | */ | ||
917 | size = roundup_pow_of_two(c->gpfifo.entry_num * | ||
918 | 2 * 18 * sizeof(u32) / 3); | ||
919 | |||
920 | err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); | ||
921 | if (err) { | ||
922 | nvgpu_err(g, "%s: memory allocation failed", __func__); | ||
923 | goto clean_up; | ||
924 | } | ||
925 | |||
926 | q->size = q->mem.size / sizeof (u32); | ||
927 | |||
928 | return 0; | ||
929 | |||
930 | clean_up: | ||
931 | channel_gk20a_free_priv_cmdbuf(c); | ||
932 | return err; | ||
933 | } | ||
934 | |||
935 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) | ||
936 | { | ||
937 | struct vm_gk20a *ch_vm = c->vm; | ||
938 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
939 | |||
940 | if (q->size == 0) | ||
941 | return; | ||
942 | |||
943 | nvgpu_dma_unmap_free(ch_vm, &q->mem); | ||
944 | |||
945 | memset(q, 0, sizeof(struct priv_cmd_queue)); | ||
946 | } | ||
947 | |||
948 | /* allocate a cmd buffer with given size. size is number of u32 entries */ | ||
949 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | ||
950 | struct priv_cmd_entry *e) | ||
951 | { | ||
952 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
953 | u32 free_count; | ||
954 | u32 size = orig_size; | ||
955 | |||
956 | gk20a_dbg_fn("size %d", orig_size); | ||
957 | |||
958 | if (!e) { | ||
959 | nvgpu_err(c->g, | ||
960 | "ch %d: priv cmd entry is null", | ||
961 | c->chid); | ||
962 | return -EINVAL; | ||
963 | } | ||
964 | |||
965 | /* if free space in the end is less than requested, increase the size | ||
966 | * to make the real allocated space start from beginning. */ | ||
967 | if (q->put + size > q->size) | ||
968 | size = orig_size + (q->size - q->put); | ||
969 | |||
970 | gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d", | ||
971 | c->chid, q->get, q->put); | ||
972 | |||
973 | free_count = (q->size - (q->put - q->get) - 1) % q->size; | ||
974 | |||
975 | if (size > free_count) | ||
976 | return -EAGAIN; | ||
977 | |||
978 | e->size = orig_size; | ||
979 | e->mem = &q->mem; | ||
980 | |||
981 | /* if we have increased size to skip free space in the end, set put | ||
982 | to beginning of cmd buffer (0) + size */ | ||
983 | if (size != orig_size) { | ||
984 | e->off = 0; | ||
985 | e->gva = q->mem.gpu_va; | ||
986 | q->put = orig_size; | ||
987 | } else { | ||
988 | e->off = q->put; | ||
989 | e->gva = q->mem.gpu_va + q->put * sizeof(u32); | ||
990 | q->put = (q->put + orig_size) & (q->size - 1); | ||
991 | } | ||
992 | |||
993 | /* we already handled q->put + size > q->size so BUG_ON this */ | ||
994 | BUG_ON(q->put > q->size); | ||
995 | |||
996 | /* | ||
997 | * commit the previous writes before making the entry valid. | ||
998 | * see the corresponding nvgpu_smp_rmb() in gk20a_free_priv_cmdbuf(). | ||
999 | */ | ||
1000 | nvgpu_smp_wmb(); | ||
1001 | |||
1002 | e->valid = true; | ||
1003 | gk20a_dbg_fn("done"); | ||
1004 | |||
1005 | return 0; | ||
1006 | } | ||
1007 | |||
1008 | /* Don't call this to free an explict cmd entry. | ||
1009 | * It doesn't update priv_cmd_queue get/put */ | ||
1010 | void free_priv_cmdbuf(struct channel_gk20a *c, | ||
1011 | struct priv_cmd_entry *e) | ||
1012 | { | ||
1013 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1014 | memset(e, 0, sizeof(struct priv_cmd_entry)); | ||
1015 | else | ||
1016 | nvgpu_kfree(c->g, e); | ||
1017 | } | ||
1018 | |||
1019 | int channel_gk20a_alloc_job(struct channel_gk20a *c, | ||
1020 | struct channel_gk20a_job **job_out) | ||
1021 | { | ||
1022 | int err = 0; | ||
1023 | |||
1024 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1025 | int put = c->joblist.pre_alloc.put; | ||
1026 | int get = c->joblist.pre_alloc.get; | ||
1027 | |||
1028 | /* | ||
1029 | * ensure all subsequent reads happen after reading get. | ||
1030 | * see corresponding nvgpu_smp_wmb in | ||
1031 | * gk20a_channel_clean_up_jobs() | ||
1032 | */ | ||
1033 | nvgpu_smp_rmb(); | ||
1034 | |||
1035 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) | ||
1036 | *job_out = &c->joblist.pre_alloc.jobs[put]; | ||
1037 | else { | ||
1038 | nvgpu_warn(c->g, | ||
1039 | "out of job ringbuffer space"); | ||
1040 | err = -EAGAIN; | ||
1041 | } | ||
1042 | } else { | ||
1043 | *job_out = nvgpu_kzalloc(c->g, | ||
1044 | sizeof(struct channel_gk20a_job)); | ||
1045 | if (!*job_out) | ||
1046 | err = -ENOMEM; | ||
1047 | } | ||
1048 | |||
1049 | return err; | ||
1050 | } | ||
1051 | |||
1052 | void channel_gk20a_free_job(struct channel_gk20a *c, | ||
1053 | struct channel_gk20a_job *job) | ||
1054 | { | ||
1055 | /* | ||
1056 | * In case of pre_allocated jobs, we need to clean out | ||
1057 | * the job but maintain the pointers to the priv_cmd_entry, | ||
1058 | * since they're inherently tied to the job node. | ||
1059 | */ | ||
1060 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1061 | struct priv_cmd_entry *wait_cmd = job->wait_cmd; | ||
1062 | struct priv_cmd_entry *incr_cmd = job->incr_cmd; | ||
1063 | memset(job, 0, sizeof(*job)); | ||
1064 | job->wait_cmd = wait_cmd; | ||
1065 | job->incr_cmd = incr_cmd; | ||
1066 | } else | ||
1067 | nvgpu_kfree(c->g, job); | ||
1068 | } | ||
1069 | |||
1070 | void channel_gk20a_joblist_lock(struct channel_gk20a *c) | ||
1071 | { | ||
1072 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1073 | nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); | ||
1074 | else | ||
1075 | nvgpu_spinlock_acquire(&c->joblist.dynamic.lock); | ||
1076 | } | ||
1077 | |||
1078 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c) | ||
1079 | { | ||
1080 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1081 | nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); | ||
1082 | else | ||
1083 | nvgpu_spinlock_release(&c->joblist.dynamic.lock); | ||
1084 | } | ||
1085 | |||
1086 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
1087 | struct channel_gk20a *c) | ||
1088 | { | ||
1089 | int get; | ||
1090 | struct channel_gk20a_job *job = NULL; | ||
1091 | |||
1092 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1093 | if (!channel_gk20a_joblist_is_empty(c)) { | ||
1094 | get = c->joblist.pre_alloc.get; | ||
1095 | job = &c->joblist.pre_alloc.jobs[get]; | ||
1096 | } | ||
1097 | } else { | ||
1098 | if (!nvgpu_list_empty(&c->joblist.dynamic.jobs)) | ||
1099 | job = nvgpu_list_first_entry(&c->joblist.dynamic.jobs, | ||
1100 | channel_gk20a_job, list); | ||
1101 | } | ||
1102 | |||
1103 | return job; | ||
1104 | } | ||
1105 | |||
1106 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, | ||
1107 | struct channel_gk20a_job *job) | ||
1108 | { | ||
1109 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1110 | c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) % | ||
1111 | (c->joblist.pre_alloc.length); | ||
1112 | } else { | ||
1113 | nvgpu_list_add_tail(&job->list, &c->joblist.dynamic.jobs); | ||
1114 | } | ||
1115 | } | ||
1116 | |||
1117 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
1118 | struct channel_gk20a_job *job) | ||
1119 | { | ||
1120 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1121 | c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) % | ||
1122 | (c->joblist.pre_alloc.length); | ||
1123 | } else { | ||
1124 | nvgpu_list_del(&job->list); | ||
1125 | } | ||
1126 | } | ||
1127 | |||
1128 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c) | ||
1129 | { | ||
1130 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1131 | int get = c->joblist.pre_alloc.get; | ||
1132 | int put = c->joblist.pre_alloc.put; | ||
1133 | return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length)); | ||
1134 | } | ||
1135 | |||
1136 | return nvgpu_list_empty(&c->joblist.dynamic.jobs); | ||
1137 | } | ||
1138 | |||
1139 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) | ||
1140 | { | ||
1141 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; | ||
1142 | |||
1143 | nvgpu_smp_rmb(); | ||
1144 | return pre_alloc_enabled; | ||
1145 | } | ||
1146 | |||
1147 | static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, | ||
1148 | unsigned int num_jobs) | ||
1149 | { | ||
1150 | unsigned int i; | ||
1151 | int err; | ||
1152 | size_t size; | ||
1153 | struct priv_cmd_entry *entries = NULL; | ||
1154 | |||
1155 | if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs) | ||
1156 | return -EINVAL; | ||
1157 | |||
1158 | /* | ||
1159 | * pre-allocate the job list. | ||
1160 | * since vmalloc take in an unsigned long, we need | ||
1161 | * to make sure we don't hit an overflow condition | ||
1162 | */ | ||
1163 | size = sizeof(struct channel_gk20a_job); | ||
1164 | if (num_jobs <= ULONG_MAX / size) | ||
1165 | c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g, | ||
1166 | num_jobs * size); | ||
1167 | if (!c->joblist.pre_alloc.jobs) { | ||
1168 | err = -ENOMEM; | ||
1169 | goto clean_up; | ||
1170 | } | ||
1171 | |||
1172 | /* | ||
1173 | * pre-allocate 2x priv_cmd_entry for each job up front. | ||
1174 | * since vmalloc take in an unsigned long, we need | ||
1175 | * to make sure we don't hit an overflow condition | ||
1176 | */ | ||
1177 | size = sizeof(struct priv_cmd_entry); | ||
1178 | if (num_jobs <= ULONG_MAX / (size << 1)) | ||
1179 | entries = nvgpu_vzalloc(c->g, (num_jobs << 1) * size); | ||
1180 | if (!entries) { | ||
1181 | err = -ENOMEM; | ||
1182 | goto clean_up_joblist; | ||
1183 | } | ||
1184 | |||
1185 | for (i = 0; i < num_jobs; i++) { | ||
1186 | c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; | ||
1187 | c->joblist.pre_alloc.jobs[i].incr_cmd = | ||
1188 | &entries[i + num_jobs]; | ||
1189 | } | ||
1190 | |||
1191 | /* pre-allocate a fence pool */ | ||
1192 | err = gk20a_alloc_fence_pool(c, num_jobs); | ||
1193 | if (err) | ||
1194 | goto clean_up_priv_cmd; | ||
1195 | |||
1196 | c->joblist.pre_alloc.length = num_jobs; | ||
1197 | |||
1198 | /* | ||
1199 | * commit the previous writes before setting the flag. | ||
1200 | * see corresponding nvgpu_smp_rmb in | ||
1201 | * channel_gk20a_is_prealloc_enabled() | ||
1202 | */ | ||
1203 | nvgpu_smp_wmb(); | ||
1204 | c->joblist.pre_alloc.enabled = true; | ||
1205 | |||
1206 | return 0; | ||
1207 | |||
1208 | clean_up_priv_cmd: | ||
1209 | nvgpu_vfree(c->g, entries); | ||
1210 | clean_up_joblist: | ||
1211 | nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); | ||
1212 | clean_up: | ||
1213 | memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); | ||
1214 | return err; | ||
1215 | } | ||
1216 | |||
1217 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) | ||
1218 | { | ||
1219 | nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd); | ||
1220 | nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); | ||
1221 | gk20a_free_fence_pool(c); | ||
1222 | |||
1223 | /* | ||
1224 | * commit the previous writes before disabling the flag. | ||
1225 | * see corresponding nvgpu_smp_rmb in | ||
1226 | * channel_gk20a_is_prealloc_enabled() | ||
1227 | */ | ||
1228 | nvgpu_smp_wmb(); | ||
1229 | c->joblist.pre_alloc.enabled = false; | ||
1230 | } | ||
1231 | |||
1232 | int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, | ||
1233 | unsigned int num_entries, | ||
1234 | unsigned int num_inflight_jobs, | ||
1235 | u32 flags) | ||
1236 | { | ||
1237 | struct gk20a *g = c->g; | ||
1238 | struct vm_gk20a *ch_vm; | ||
1239 | u32 gpfifo_size, gpfifo_entry_size; | ||
1240 | int err = 0; | ||
1241 | unsigned long acquire_timeout; | ||
1242 | |||
1243 | gpfifo_size = num_entries; | ||
1244 | gpfifo_entry_size = nvgpu_get_gpfifo_entry_size(); | ||
1245 | |||
1246 | if (flags & NVGPU_GPFIFO_FLAGS_SUPPORT_VPR) | ||
1247 | c->vpr = true; | ||
1248 | |||
1249 | if (flags & NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC) { | ||
1250 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1251 | /* | ||
1252 | * Railgating isn't deterministic; instead of disallowing | ||
1253 | * railgating globally, take a power refcount for this | ||
1254 | * channel's lifetime. The gk20a_idle() pair for this happens | ||
1255 | * when the channel gets freed. | ||
1256 | * | ||
1257 | * Deterministic flag and this busy must be atomic within the | ||
1258 | * busy lock. | ||
1259 | */ | ||
1260 | err = gk20a_busy(g); | ||
1261 | if (err) { | ||
1262 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1263 | return err; | ||
1264 | } | ||
1265 | |||
1266 | c->deterministic = true; | ||
1267 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1268 | } | ||
1269 | |||
1270 | /* an address space needs to have been bound at this point. */ | ||
1271 | if (!gk20a_channel_as_bound(c)) { | ||
1272 | nvgpu_err(g, | ||
1273 | "not bound to an address space at time of gpfifo" | ||
1274 | " allocation."); | ||
1275 | err = -EINVAL; | ||
1276 | goto clean_up_idle; | ||
1277 | } | ||
1278 | ch_vm = c->vm; | ||
1279 | |||
1280 | if (c->gpfifo.mem.size) { | ||
1281 | nvgpu_err(g, "channel %d :" | ||
1282 | "gpfifo already allocated", c->chid); | ||
1283 | err = -EEXIST; | ||
1284 | goto clean_up_idle; | ||
1285 | } | ||
1286 | |||
1287 | err = nvgpu_dma_alloc_map_sys(ch_vm, | ||
1288 | gpfifo_size * gpfifo_entry_size, | ||
1289 | &c->gpfifo.mem); | ||
1290 | if (err) { | ||
1291 | nvgpu_err(g, "%s: memory allocation failed", __func__); | ||
1292 | goto clean_up; | ||
1293 | } | ||
1294 | |||
1295 | if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
1296 | c->gpfifo.pipe = nvgpu_big_malloc(g, | ||
1297 | gpfifo_size * gpfifo_entry_size); | ||
1298 | if (!c->gpfifo.pipe) { | ||
1299 | err = -ENOMEM; | ||
1300 | goto clean_up_unmap; | ||
1301 | } | ||
1302 | } | ||
1303 | |||
1304 | c->gpfifo.entry_num = gpfifo_size; | ||
1305 | c->gpfifo.get = c->gpfifo.put = 0; | ||
1306 | |||
1307 | gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d", | ||
1308 | c->chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num); | ||
1309 | |||
1310 | g->ops.fifo.setup_userd(c); | ||
1311 | |||
1312 | if (!g->aggressive_sync_destroy_thresh) { | ||
1313 | nvgpu_mutex_acquire(&c->sync_lock); | ||
1314 | c->sync = gk20a_channel_sync_create(c); | ||
1315 | if (!c->sync) { | ||
1316 | err = -ENOMEM; | ||
1317 | nvgpu_mutex_release(&c->sync_lock); | ||
1318 | goto clean_up_unmap; | ||
1319 | } | ||
1320 | nvgpu_mutex_release(&c->sync_lock); | ||
1321 | |||
1322 | if (g->ops.fifo.resetup_ramfc) { | ||
1323 | err = g->ops.fifo.resetup_ramfc(c); | ||
1324 | if (err) | ||
1325 | goto clean_up_sync; | ||
1326 | } | ||
1327 | } | ||
1328 | |||
1329 | if (!c->g->timeouts_enabled || !c->wdt_enabled) | ||
1330 | acquire_timeout = 0; | ||
1331 | else | ||
1332 | acquire_timeout = gk20a_get_channel_watchdog_timeout(c); | ||
1333 | |||
1334 | err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, | ||
1335 | c->gpfifo.entry_num, | ||
1336 | acquire_timeout, flags); | ||
1337 | if (err) | ||
1338 | goto clean_up_sync; | ||
1339 | |||
1340 | /* TBD: setup engine contexts */ | ||
1341 | |||
1342 | if (num_inflight_jobs) { | ||
1343 | err = channel_gk20a_prealloc_resources(c, | ||
1344 | num_inflight_jobs); | ||
1345 | if (err) | ||
1346 | goto clean_up_sync; | ||
1347 | } | ||
1348 | |||
1349 | err = channel_gk20a_alloc_priv_cmdbuf(c); | ||
1350 | if (err) | ||
1351 | goto clean_up_prealloc; | ||
1352 | |||
1353 | err = channel_gk20a_update_runlist(c, true); | ||
1354 | if (err) | ||
1355 | goto clean_up_priv_cmd; | ||
1356 | |||
1357 | g->ops.fifo.bind_channel(c); | ||
1358 | |||
1359 | gk20a_dbg_fn("done"); | ||
1360 | return 0; | ||
1361 | |||
1362 | clean_up_priv_cmd: | ||
1363 | channel_gk20a_free_priv_cmdbuf(c); | ||
1364 | clean_up_prealloc: | ||
1365 | if (num_inflight_jobs) | ||
1366 | channel_gk20a_free_prealloc_resources(c); | ||
1367 | clean_up_sync: | ||
1368 | if (c->sync) { | ||
1369 | gk20a_channel_sync_destroy(c->sync); | ||
1370 | c->sync = NULL; | ||
1371 | } | ||
1372 | clean_up_unmap: | ||
1373 | nvgpu_big_free(g, c->gpfifo.pipe); | ||
1374 | nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); | ||
1375 | clean_up: | ||
1376 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); | ||
1377 | clean_up_idle: | ||
1378 | if (c->deterministic) { | ||
1379 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1380 | gk20a_idle(g); | ||
1381 | c->deterministic = false; | ||
1382 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1383 | } | ||
1384 | nvgpu_err(g, "fail"); | ||
1385 | return err; | ||
1386 | } | ||
1387 | |||
1388 | /* Update with this periodically to determine how the gpfifo is draining. */ | ||
1389 | static inline u32 update_gp_get(struct gk20a *g, | ||
1390 | struct channel_gk20a *c) | ||
1391 | { | ||
1392 | u32 new_get = g->ops.fifo.userd_gp_get(g, c); | ||
1393 | |||
1394 | if (new_get < c->gpfifo.get) | ||
1395 | c->gpfifo.wrap = !c->gpfifo.wrap; | ||
1396 | c->gpfifo.get = new_get; | ||
1397 | return new_get; | ||
1398 | } | ||
1399 | |||
1400 | u32 nvgpu_gp_free_count(struct channel_gk20a *c) | ||
1401 | { | ||
1402 | return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) % | ||
1403 | c->gpfifo.entry_num; | ||
1404 | } | ||
1405 | |||
1406 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | ||
1407 | u32 timeout_delta_ms, bool *progress) | ||
1408 | { | ||
1409 | u32 gpfifo_get = update_gp_get(ch->g, ch); | ||
1410 | |||
1411 | /* Count consequent timeout isr */ | ||
1412 | if (gpfifo_get == ch->timeout_gpfifo_get) { | ||
1413 | /* we didn't advance since previous channel timeout check */ | ||
1414 | ch->timeout_accumulated_ms += timeout_delta_ms; | ||
1415 | *progress = false; | ||
1416 | } else { | ||
1417 | /* first timeout isr encountered */ | ||
1418 | ch->timeout_accumulated_ms = timeout_delta_ms; | ||
1419 | *progress = true; | ||
1420 | } | ||
1421 | |||
1422 | ch->timeout_gpfifo_get = gpfifo_get; | ||
1423 | |||
1424 | return ch->g->timeouts_enabled && | ||
1425 | ch->timeout_accumulated_ms > ch->timeout_ms_max; | ||
1426 | } | ||
1427 | |||
1428 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch) | ||
1429 | { | ||
1430 | return ch->g->ch_wdt_timeout_ms; | ||
1431 | } | ||
1432 | |||
1433 | u32 nvgpu_get_gp_free_count(struct channel_gk20a *c) | ||
1434 | { | ||
1435 | update_gp_get(c->g, c); | ||
1436 | return nvgpu_gp_free_count(c); | ||
1437 | } | ||
1438 | |||
1439 | static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) | ||
1440 | { | ||
1441 | ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch); | ||
1442 | ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch); | ||
1443 | ch->timeout.running = true; | ||
1444 | nvgpu_timeout_init(ch->g, &ch->timeout.timer, | ||
1445 | gk20a_get_channel_watchdog_timeout(ch), | ||
1446 | NVGPU_TIMER_CPU_TIMER); | ||
1447 | } | ||
1448 | |||
1449 | /** | ||
1450 | * Start a timeout counter (watchdog) on this channel. | ||
1451 | * | ||
1452 | * Trigger a watchdog to recover the channel after the per-platform timeout | ||
1453 | * duration (but strictly no earlier) if the channel hasn't advanced within | ||
1454 | * that time. | ||
1455 | * | ||
1456 | * If the timeout is already running, do nothing. This should be called when | ||
1457 | * new jobs are submitted. The timeout will stop when the last tracked job | ||
1458 | * finishes, making the channel idle. | ||
1459 | * | ||
1460 | * The channel's gpfifo read pointer will be used to determine if the job has | ||
1461 | * actually stuck at that time. After the timeout duration has expired, a | ||
1462 | * worker thread will consider the channel stuck and recover it if stuck. | ||
1463 | */ | ||
1464 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch) | ||
1465 | { | ||
1466 | if (!ch->g->timeouts_enabled || !gk20a_get_channel_watchdog_timeout(ch)) | ||
1467 | return; | ||
1468 | |||
1469 | if (!ch->wdt_enabled) | ||
1470 | return; | ||
1471 | |||
1472 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1473 | |||
1474 | if (ch->timeout.running) { | ||
1475 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1476 | return; | ||
1477 | } | ||
1478 | __gk20a_channel_timeout_start(ch); | ||
1479 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1480 | } | ||
1481 | |||
1482 | /** | ||
1483 | * Stop a running timeout counter (watchdog) on this channel. | ||
1484 | * | ||
1485 | * Make the watchdog consider the channel not running, so that it won't get | ||
1486 | * recovered even if no progress is detected. Progress is not tracked if the | ||
1487 | * watchdog is turned off. | ||
1488 | * | ||
1489 | * No guarantees are made about concurrent execution of the timeout handler. | ||
1490 | * (This should be called from an update handler running in the same thread | ||
1491 | * with the watchdog.) | ||
1492 | */ | ||
1493 | static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch) | ||
1494 | { | ||
1495 | bool was_running; | ||
1496 | |||
1497 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1498 | was_running = ch->timeout.running; | ||
1499 | ch->timeout.running = false; | ||
1500 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1501 | return was_running; | ||
1502 | } | ||
1503 | |||
1504 | /** | ||
1505 | * Continue a previously stopped timeout | ||
1506 | * | ||
1507 | * Enable the timeout again but don't reinitialize its timer. | ||
1508 | * | ||
1509 | * No guarantees are made about concurrent execution of the timeout handler. | ||
1510 | * (This should be called from an update handler running in the same thread | ||
1511 | * with the watchdog.) | ||
1512 | */ | ||
1513 | static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) | ||
1514 | { | ||
1515 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1516 | ch->timeout.running = true; | ||
1517 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1518 | } | ||
1519 | |||
1520 | /** | ||
1521 | * Rewind the timeout on each non-dormant channel. | ||
1522 | * | ||
1523 | * Reschedule the timeout of each active channel for which timeouts are running | ||
1524 | * as if something was happened on each channel right now. This should be | ||
1525 | * called when a global hang is detected that could cause a false positive on | ||
1526 | * other innocent channels. | ||
1527 | */ | ||
1528 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) | ||
1529 | { | ||
1530 | struct fifo_gk20a *f = &g->fifo; | ||
1531 | u32 chid; | ||
1532 | |||
1533 | for (chid = 0; chid < f->num_channels; chid++) { | ||
1534 | struct channel_gk20a *ch = &f->channel[chid]; | ||
1535 | |||
1536 | if (!gk20a_channel_get(ch)) | ||
1537 | continue; | ||
1538 | |||
1539 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1540 | if (ch->timeout.running) | ||
1541 | __gk20a_channel_timeout_start(ch); | ||
1542 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1543 | |||
1544 | gk20a_channel_put(ch); | ||
1545 | } | ||
1546 | } | ||
1547 | |||
1548 | /** | ||
1549 | * Check if a timed out channel has hung and recover it if it has. | ||
1550 | * | ||
1551 | * Test if this channel has really got stuck at this point (should be called | ||
1552 | * when the watchdog timer has expired) by checking if its gp_get has advanced | ||
1553 | * or not. If no gp_get action happened since when the watchdog was started, | ||
1554 | * force-reset the channel. | ||
1555 | * | ||
1556 | * The gpu is implicitly on at this point, because the watchdog can only run on | ||
1557 | * channels that have submitted jobs pending for cleanup. | ||
1558 | */ | ||
1559 | static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) | ||
1560 | { | ||
1561 | struct gk20a *g = ch->g; | ||
1562 | u32 gp_get; | ||
1563 | u32 new_gp_get; | ||
1564 | u64 pb_get; | ||
1565 | u64 new_pb_get; | ||
1566 | |||
1567 | gk20a_dbg_fn(""); | ||
1568 | |||
1569 | /* Get status and clear the timer */ | ||
1570 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1571 | gp_get = ch->timeout.gp_get; | ||
1572 | pb_get = ch->timeout.pb_get; | ||
1573 | ch->timeout.running = false; | ||
1574 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1575 | |||
1576 | new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch); | ||
1577 | new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch); | ||
1578 | |||
1579 | if (new_gp_get != gp_get || new_pb_get != pb_get) { | ||
1580 | /* Channel has advanced, reschedule */ | ||
1581 | gk20a_channel_timeout_start(ch); | ||
1582 | return; | ||
1583 | } | ||
1584 | |||
1585 | nvgpu_err(g, "Job on channel %d timed out", | ||
1586 | ch->chid); | ||
1587 | |||
1588 | gk20a_debug_dump(g); | ||
1589 | gk20a_gr_debug_dump(g); | ||
1590 | |||
1591 | g->ops.fifo.force_reset_ch(ch, | ||
1592 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); | ||
1593 | } | ||
1594 | |||
1595 | /** | ||
1596 | * Test if the per-channel timeout is expired and handle the timeout in that case. | ||
1597 | * | ||
1598 | * Each channel has an expiration time based watchdog. The timer is | ||
1599 | * (re)initialized in two situations: when a new job is submitted on an idle | ||
1600 | * channel and when the timeout is checked but progress is detected. | ||
1601 | * | ||
1602 | * Watchdog timeout does not yet necessarily mean a stuck channel so this may | ||
1603 | * or may not cause recovery. | ||
1604 | * | ||
1605 | * The timeout is stopped (disabled) after the last job in a row finishes | ||
1606 | * making the channel idle. | ||
1607 | */ | ||
1608 | static void gk20a_channel_timeout_check(struct channel_gk20a *ch) | ||
1609 | { | ||
1610 | bool timed_out; | ||
1611 | |||
1612 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | ||
1613 | timed_out = ch->timeout.running && | ||
1614 | nvgpu_timeout_peek_expired(&ch->timeout.timer); | ||
1615 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
1616 | |||
1617 | if (timed_out) | ||
1618 | gk20a_channel_timeout_handler(ch); | ||
1619 | } | ||
1620 | |||
1621 | /** | ||
1622 | * Loop every living channel, check timeouts and handle stuck channels. | ||
1623 | */ | ||
1624 | static void gk20a_channel_poll_timeouts(struct gk20a *g) | ||
1625 | { | ||
1626 | unsigned int chid; | ||
1627 | |||
1628 | |||
1629 | for (chid = 0; chid < g->fifo.num_channels; chid++) { | ||
1630 | struct channel_gk20a *ch = &g->fifo.channel[chid]; | ||
1631 | |||
1632 | if (gk20a_channel_get(ch)) { | ||
1633 | gk20a_channel_timeout_check(ch); | ||
1634 | gk20a_channel_put(ch); | ||
1635 | } | ||
1636 | } | ||
1637 | } | ||
1638 | |||
1639 | /* | ||
1640 | * Process one scheduled work item for this channel. Currently, the only thing | ||
1641 | * the worker does is job cleanup handling. | ||
1642 | */ | ||
1643 | static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch) | ||
1644 | { | ||
1645 | gk20a_dbg_fn(""); | ||
1646 | |||
1647 | gk20a_channel_clean_up_jobs(ch, true); | ||
1648 | |||
1649 | /* ref taken when enqueued */ | ||
1650 | gk20a_channel_put(ch); | ||
1651 | } | ||
1652 | |||
1653 | /** | ||
1654 | * Tell the worker that one more work needs to be done. | ||
1655 | * | ||
1656 | * Increase the work counter to synchronize the worker with the new work. Wake | ||
1657 | * up the worker. If the worker was already running, it will handle this work | ||
1658 | * before going to sleep. | ||
1659 | */ | ||
1660 | static int __gk20a_channel_worker_wakeup(struct gk20a *g) | ||
1661 | { | ||
1662 | int put; | ||
1663 | |||
1664 | gk20a_dbg_fn(""); | ||
1665 | |||
1666 | /* | ||
1667 | * Currently, the only work type is associated with a lock, which deals | ||
1668 | * with any necessary barriers. If a work type with no locking were | ||
1669 | * added, a nvgpu_smp_wmb() would be needed here. See | ||
1670 | * ..worker_pending() for a pair. | ||
1671 | */ | ||
1672 | |||
1673 | put = nvgpu_atomic_inc_return(&g->channel_worker.put); | ||
1674 | nvgpu_cond_signal_interruptible(&g->channel_worker.wq); | ||
1675 | |||
1676 | return put; | ||
1677 | } | ||
1678 | |||
1679 | /** | ||
1680 | * Test if there is some work pending. | ||
1681 | * | ||
1682 | * This is a pair for __gk20a_channel_worker_wakeup to be called from the | ||
1683 | * worker. The worker has an internal work counter which is incremented once | ||
1684 | * per finished work item. This is compared with the number of queued jobs, | ||
1685 | * which may be channels on the items list or any other types of work. | ||
1686 | */ | ||
1687 | static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) | ||
1688 | { | ||
1689 | bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get; | ||
1690 | |||
1691 | /* | ||
1692 | * This would be the place for a nvgpu_smp_rmb() pairing | ||
1693 | * a nvgpu_smp_wmb() for a wakeup if we had any work with | ||
1694 | * no implicit barriers caused by locking. | ||
1695 | */ | ||
1696 | |||
1697 | return pending; | ||
1698 | } | ||
1699 | |||
1700 | /** | ||
1701 | * Process the queued works for the worker thread serially. | ||
1702 | * | ||
1703 | * Flush all the work items in the queue one by one. This may block timeout | ||
1704 | * handling for a short while, as these are serialized. | ||
1705 | */ | ||
1706 | static void gk20a_channel_worker_process(struct gk20a *g, int *get) | ||
1707 | { | ||
1708 | |||
1709 | while (__gk20a_channel_worker_pending(g, *get)) { | ||
1710 | struct channel_gk20a *ch = NULL; | ||
1711 | |||
1712 | /* | ||
1713 | * If a channel is on the list, it's guaranteed to be handled | ||
1714 | * eventually just once. However, the opposite is not true. A | ||
1715 | * channel may be being processed if it's on the list or not. | ||
1716 | * | ||
1717 | * With this, processing channel works should be conservative | ||
1718 | * as follows: it's always safe to look at a channel found in | ||
1719 | * the list, and if someone enqueues the channel, it will be | ||
1720 | * handled eventually, even if it's being handled at the same | ||
1721 | * time. A channel is on the list only once; multiple calls to | ||
1722 | * enqueue are harmless. | ||
1723 | */ | ||
1724 | nvgpu_spinlock_acquire(&g->channel_worker.items_lock); | ||
1725 | if (!nvgpu_list_empty(&g->channel_worker.items)) { | ||
1726 | ch = nvgpu_list_first_entry(&g->channel_worker.items, | ||
1727 | channel_gk20a, | ||
1728 | worker_item); | ||
1729 | nvgpu_list_del(&ch->worker_item); | ||
1730 | } | ||
1731 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
1732 | |||
1733 | if (!ch) { | ||
1734 | /* | ||
1735 | * Woke up for some other reason, but there are no | ||
1736 | * other reasons than a channel added in the items list | ||
1737 | * currently, so warn and ack the message. | ||
1738 | */ | ||
1739 | nvgpu_warn(g, "Spurious worker event!"); | ||
1740 | ++*get; | ||
1741 | break; | ||
1742 | } | ||
1743 | |||
1744 | gk20a_channel_worker_process_ch(ch); | ||
1745 | ++*get; | ||
1746 | } | ||
1747 | } | ||
1748 | |||
1749 | /* | ||
1750 | * Look at channel states periodically, until canceled. Abort timed out | ||
1751 | * channels serially. Process all work items found in the queue. | ||
1752 | */ | ||
1753 | static int gk20a_channel_poll_worker(void *arg) | ||
1754 | { | ||
1755 | struct gk20a *g = (struct gk20a *)arg; | ||
1756 | struct gk20a_channel_worker *worker = &g->channel_worker; | ||
1757 | unsigned long watchdog_interval = 100; /* milliseconds */ | ||
1758 | struct nvgpu_timeout timeout; | ||
1759 | int get = 0; | ||
1760 | |||
1761 | gk20a_dbg_fn(""); | ||
1762 | |||
1763 | nvgpu_timeout_init(g, &timeout, watchdog_interval, | ||
1764 | NVGPU_TIMER_CPU_TIMER); | ||
1765 | while (!nvgpu_thread_should_stop(&worker->poll_task)) { | ||
1766 | int ret; | ||
1767 | |||
1768 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
1769 | &worker->wq, | ||
1770 | __gk20a_channel_worker_pending(g, get), | ||
1771 | watchdog_interval) > 0; | ||
1772 | |||
1773 | if (ret == 0) | ||
1774 | gk20a_channel_worker_process(g, &get); | ||
1775 | |||
1776 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
1777 | gk20a_channel_poll_timeouts(g); | ||
1778 | nvgpu_timeout_init(g, &timeout, watchdog_interval, | ||
1779 | NVGPU_TIMER_CPU_TIMER); | ||
1780 | } | ||
1781 | } | ||
1782 | return 0; | ||
1783 | } | ||
1784 | |||
1785 | static int __nvgpu_channel_worker_start(struct gk20a *g) | ||
1786 | { | ||
1787 | char thread_name[64]; | ||
1788 | int err = 0; | ||
1789 | |||
1790 | if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) | ||
1791 | return err; | ||
1792 | |||
1793 | nvgpu_mutex_acquire(&g->channel_worker.start_lock); | ||
1794 | |||
1795 | /* | ||
1796 | * We don't want to grab a mutex on every channel update so we check | ||
1797 | * again if the worker has been initialized before creating a new thread | ||
1798 | */ | ||
1799 | |||
1800 | /* | ||
1801 | * Mutexes have implicit barriers, so there is no risk of a thread | ||
1802 | * having a stale copy of the poll_task variable as the call to | ||
1803 | * thread_is_running is volatile | ||
1804 | */ | ||
1805 | |||
1806 | if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) { | ||
1807 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1808 | return err; | ||
1809 | } | ||
1810 | |||
1811 | snprintf(thread_name, sizeof(thread_name), | ||
1812 | "nvgpu_channel_poll_%s", g->name); | ||
1813 | |||
1814 | err = nvgpu_thread_create(&g->channel_worker.poll_task, g, | ||
1815 | gk20a_channel_poll_worker, thread_name); | ||
1816 | |||
1817 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1818 | return err; | ||
1819 | } | ||
1820 | /** | ||
1821 | * Initialize the channel worker's metadata and start the background thread. | ||
1822 | */ | ||
1823 | int nvgpu_channel_worker_init(struct gk20a *g) | ||
1824 | { | ||
1825 | int err; | ||
1826 | |||
1827 | nvgpu_atomic_set(&g->channel_worker.put, 0); | ||
1828 | nvgpu_cond_init(&g->channel_worker.wq); | ||
1829 | nvgpu_init_list_node(&g->channel_worker.items); | ||
1830 | nvgpu_spinlock_init(&g->channel_worker.items_lock); | ||
1831 | err = nvgpu_mutex_init(&g->channel_worker.start_lock); | ||
1832 | if (err) | ||
1833 | goto error_check; | ||
1834 | |||
1835 | err = __nvgpu_channel_worker_start(g); | ||
1836 | error_check: | ||
1837 | if (err) { | ||
1838 | nvgpu_err(g, "failed to start channel poller thread"); | ||
1839 | return err; | ||
1840 | } | ||
1841 | return 0; | ||
1842 | } | ||
1843 | |||
1844 | void nvgpu_channel_worker_deinit(struct gk20a *g) | ||
1845 | { | ||
1846 | nvgpu_mutex_acquire(&g->channel_worker.start_lock); | ||
1847 | nvgpu_thread_stop(&g->channel_worker.poll_task); | ||
1848 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1849 | } | ||
1850 | |||
1851 | /** | ||
1852 | * Append a channel to the worker's list, if not there already. | ||
1853 | * | ||
1854 | * The worker thread processes work items (channels in its work list) and polls | ||
1855 | * for other things. This adds @ch to the end of the list and wakes the worker | ||
1856 | * up immediately. If the channel already existed in the list, it's not added, | ||
1857 | * because in that case it has been scheduled already but has not yet been | ||
1858 | * processed. | ||
1859 | */ | ||
1860 | static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) | ||
1861 | { | ||
1862 | struct gk20a *g = ch->g; | ||
1863 | |||
1864 | gk20a_dbg_fn(""); | ||
1865 | |||
1866 | /* | ||
1867 | * Warn if worker thread cannot run | ||
1868 | */ | ||
1869 | if (WARN_ON(__nvgpu_channel_worker_start(g))) { | ||
1870 | nvgpu_warn(g, "channel worker cannot run!"); | ||
1871 | return; | ||
1872 | } | ||
1873 | |||
1874 | /* | ||
1875 | * Ref released when this item gets processed. The caller should hold | ||
1876 | * one ref already, so normally shouldn't fail, but the channel could | ||
1877 | * end up being freed between the time the caller got its reference and | ||
1878 | * the time we end up here (e.g., if the client got killed); if so, just | ||
1879 | * return. | ||
1880 | */ | ||
1881 | if (!gk20a_channel_get(ch)) { | ||
1882 | nvgpu_info(g, "cannot get ch ref for worker!"); | ||
1883 | return; | ||
1884 | } | ||
1885 | |||
1886 | nvgpu_spinlock_acquire(&g->channel_worker.items_lock); | ||
1887 | if (!nvgpu_list_empty(&ch->worker_item)) { | ||
1888 | /* | ||
1889 | * Already queued, so will get processed eventually. | ||
1890 | * The worker is probably awake already. | ||
1891 | */ | ||
1892 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
1893 | gk20a_channel_put(ch); | ||
1894 | return; | ||
1895 | } | ||
1896 | nvgpu_list_add_tail(&ch->worker_item, &g->channel_worker.items); | ||
1897 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
1898 | |||
1899 | __gk20a_channel_worker_wakeup(g); | ||
1900 | } | ||
1901 | |||
1902 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | ||
1903 | { | ||
1904 | struct priv_cmd_queue *q = &c->priv_cmd_q; | ||
1905 | struct gk20a *g = c->g; | ||
1906 | |||
1907 | if (!e) | ||
1908 | return 0; | ||
1909 | |||
1910 | if (e->valid) { | ||
1911 | /* read the entry's valid flag before reading its contents */ | ||
1912 | nvgpu_smp_rmb(); | ||
1913 | if ((q->get != e->off) && e->off != 0) | ||
1914 | nvgpu_err(g, "requests out-of-order, ch=%d", | ||
1915 | c->chid); | ||
1916 | q->get = e->off + e->size; | ||
1917 | } | ||
1918 | |||
1919 | free_priv_cmdbuf(c, e); | ||
1920 | |||
1921 | return 0; | ||
1922 | } | ||
1923 | |||
1924 | int gk20a_channel_add_job(struct channel_gk20a *c, | ||
1925 | struct channel_gk20a_job *job, | ||
1926 | bool skip_buffer_refcounting) | ||
1927 | { | ||
1928 | struct vm_gk20a *vm = c->vm; | ||
1929 | struct nvgpu_mapped_buf **mapped_buffers = NULL; | ||
1930 | int err = 0, num_mapped_buffers = 0; | ||
1931 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
1932 | |||
1933 | if (!skip_buffer_refcounting) { | ||
1934 | err = nvgpu_vm_get_buffers(vm, &mapped_buffers, | ||
1935 | &num_mapped_buffers); | ||
1936 | if (err) | ||
1937 | return err; | ||
1938 | } | ||
1939 | |||
1940 | /* | ||
1941 | * Ref to hold the channel open during the job lifetime. This is | ||
1942 | * released by job cleanup launched via syncpt or sema interrupt. | ||
1943 | */ | ||
1944 | c = gk20a_channel_get(c); | ||
1945 | |||
1946 | if (c) { | ||
1947 | job->num_mapped_buffers = num_mapped_buffers; | ||
1948 | job->mapped_buffers = mapped_buffers; | ||
1949 | |||
1950 | gk20a_channel_timeout_start(c); | ||
1951 | |||
1952 | if (!pre_alloc_enabled) | ||
1953 | channel_gk20a_joblist_lock(c); | ||
1954 | |||
1955 | /* | ||
1956 | * ensure all pending write complete before adding to the list. | ||
1957 | * see corresponding nvgpu_smp_rmb in | ||
1958 | * gk20a_channel_clean_up_jobs() & | ||
1959 | * gk20a_channel_abort_clean_up() | ||
1960 | */ | ||
1961 | nvgpu_smp_wmb(); | ||
1962 | channel_gk20a_joblist_add(c, job); | ||
1963 | |||
1964 | if (!pre_alloc_enabled) | ||
1965 | channel_gk20a_joblist_unlock(c); | ||
1966 | } else { | ||
1967 | err = -ETIMEDOUT; | ||
1968 | goto err_put_buffers; | ||
1969 | } | ||
1970 | |||
1971 | return 0; | ||
1972 | |||
1973 | err_put_buffers: | ||
1974 | nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); | ||
1975 | |||
1976 | return err; | ||
1977 | } | ||
1978 | |||
1979 | /** | ||
1980 | * Clean up job resources for further jobs to use. | ||
1981 | * @clean_all: If true, process as many jobs as possible, otherwise just one. | ||
1982 | * | ||
1983 | * Loop all jobs from the joblist until a pending job is found, or just one if | ||
1984 | * clean_all is not set. Pending jobs are detected from the job's post fence, | ||
1985 | * so this is only done for jobs that have job tracking resources. Free all | ||
1986 | * per-job memory for completed jobs; in case of preallocated resources, this | ||
1987 | * opens up slots for new jobs to be submitted. | ||
1988 | */ | ||
1989 | void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | ||
1990 | bool clean_all) | ||
1991 | { | ||
1992 | struct vm_gk20a *vm; | ||
1993 | struct channel_gk20a_job *job; | ||
1994 | struct gk20a *g; | ||
1995 | int job_finished = 0; | ||
1996 | bool watchdog_on = false; | ||
1997 | |||
1998 | c = gk20a_channel_get(c); | ||
1999 | if (!c) | ||
2000 | return; | ||
2001 | |||
2002 | if (!c->g->power_on) { /* shutdown case */ | ||
2003 | gk20a_channel_put(c); | ||
2004 | return; | ||
2005 | } | ||
2006 | |||
2007 | vm = c->vm; | ||
2008 | g = c->g; | ||
2009 | |||
2010 | /* | ||
2011 | * If !clean_all, we're in a condition where watchdog isn't supported | ||
2012 | * anyway (this would be a no-op). | ||
2013 | */ | ||
2014 | if (clean_all) | ||
2015 | watchdog_on = gk20a_channel_timeout_stop(c); | ||
2016 | |||
2017 | /* Synchronize with abort cleanup that needs the jobs. */ | ||
2018 | nvgpu_mutex_acquire(&c->joblist.cleanup_lock); | ||
2019 | |||
2020 | while (1) { | ||
2021 | bool completed; | ||
2022 | |||
2023 | channel_gk20a_joblist_lock(c); | ||
2024 | if (channel_gk20a_joblist_is_empty(c)) { | ||
2025 | /* | ||
2026 | * No jobs in flight, timeout will remain stopped until | ||
2027 | * new jobs are submitted. | ||
2028 | */ | ||
2029 | channel_gk20a_joblist_unlock(c); | ||
2030 | break; | ||
2031 | } | ||
2032 | |||
2033 | /* | ||
2034 | * ensure that all subsequent reads occur after checking | ||
2035 | * that we have a valid node. see corresponding nvgpu_smp_wmb in | ||
2036 | * gk20a_channel_add_job(). | ||
2037 | */ | ||
2038 | nvgpu_smp_rmb(); | ||
2039 | job = channel_gk20a_joblist_peek(c); | ||
2040 | channel_gk20a_joblist_unlock(c); | ||
2041 | |||
2042 | completed = gk20a_fence_is_expired(job->post_fence); | ||
2043 | if (!completed) { | ||
2044 | /* | ||
2045 | * The watchdog eventually sees an updated gp_get if | ||
2046 | * something happened in this loop. A new job can have | ||
2047 | * been submitted between the above call to stop and | ||
2048 | * this - in that case, this is a no-op and the new | ||
2049 | * later timeout is still used. | ||
2050 | */ | ||
2051 | if (clean_all && watchdog_on) | ||
2052 | gk20a_channel_timeout_continue(c); | ||
2053 | break; | ||
2054 | } | ||
2055 | |||
2056 | WARN_ON(!c->sync); | ||
2057 | |||
2058 | if (c->sync) { | ||
2059 | c->sync->signal_timeline(c->sync); | ||
2060 | |||
2061 | if (g->aggressive_sync_destroy_thresh) { | ||
2062 | nvgpu_mutex_acquire(&c->sync_lock); | ||
2063 | if (nvgpu_atomic_dec_and_test( | ||
2064 | &c->sync->refcount) && | ||
2065 | g->aggressive_sync_destroy) { | ||
2066 | gk20a_channel_sync_destroy(c->sync); | ||
2067 | c->sync = NULL; | ||
2068 | } | ||
2069 | nvgpu_mutex_release(&c->sync_lock); | ||
2070 | } | ||
2071 | } | ||
2072 | |||
2073 | if (job->num_mapped_buffers) | ||
2074 | nvgpu_vm_put_buffers(vm, job->mapped_buffers, | ||
2075 | job->num_mapped_buffers); | ||
2076 | |||
2077 | /* Remove job from channel's job list before we close the | ||
2078 | * fences, to prevent other callers (gk20a_channel_abort) from | ||
2079 | * trying to dereference post_fence when it no longer exists. | ||
2080 | */ | ||
2081 | channel_gk20a_joblist_lock(c); | ||
2082 | channel_gk20a_joblist_delete(c, job); | ||
2083 | channel_gk20a_joblist_unlock(c); | ||
2084 | |||
2085 | /* Close the fences (this will unref the semaphores and release | ||
2086 | * them to the pool). */ | ||
2087 | gk20a_fence_put(job->pre_fence); | ||
2088 | gk20a_fence_put(job->post_fence); | ||
2089 | |||
2090 | /* Free the private command buffers (wait_cmd first and | ||
2091 | * then incr_cmd i.e. order of allocation) */ | ||
2092 | gk20a_free_priv_cmdbuf(c, job->wait_cmd); | ||
2093 | gk20a_free_priv_cmdbuf(c, job->incr_cmd); | ||
2094 | |||
2095 | /* another bookkeeping taken in add_job. caller must hold a ref | ||
2096 | * so this wouldn't get freed here. */ | ||
2097 | gk20a_channel_put(c); | ||
2098 | |||
2099 | /* | ||
2100 | * ensure all pending writes complete before freeing up the job. | ||
2101 | * see corresponding nvgpu_smp_rmb in channel_gk20a_alloc_job(). | ||
2102 | */ | ||
2103 | nvgpu_smp_wmb(); | ||
2104 | |||
2105 | channel_gk20a_free_job(c, job); | ||
2106 | job_finished = 1; | ||
2107 | |||
2108 | /* | ||
2109 | * Deterministic channels have a channel-wide power reference; | ||
2110 | * for others, there's one per submit. | ||
2111 | */ | ||
2112 | if (!c->deterministic) | ||
2113 | gk20a_idle(g); | ||
2114 | |||
2115 | if (!clean_all) { | ||
2116 | /* Timeout isn't supported here so don't touch it. */ | ||
2117 | break; | ||
2118 | } | ||
2119 | } | ||
2120 | |||
2121 | nvgpu_mutex_release(&c->joblist.cleanup_lock); | ||
2122 | |||
2123 | if (job_finished && c->update_fn) | ||
2124 | schedule_work(&c->update_fn_work); | ||
2125 | |||
2126 | gk20a_channel_put(c); | ||
2127 | } | ||
2128 | |||
2129 | /** | ||
2130 | * Schedule a job cleanup work on this channel to free resources and to signal | ||
2131 | * about completion. | ||
2132 | * | ||
2133 | * Call this when there has been an interrupt about finished jobs, or when job | ||
2134 | * cleanup needs to be performed, e.g., when closing a channel. This is always | ||
2135 | * safe to call even if there is nothing to clean up. Any visible actions on | ||
2136 | * jobs just before calling this are guaranteed to be processed. | ||
2137 | */ | ||
2138 | void gk20a_channel_update(struct channel_gk20a *c) | ||
2139 | { | ||
2140 | if (!c->g->power_on) { /* shutdown case */ | ||
2141 | return; | ||
2142 | } | ||
2143 | |||
2144 | trace_gk20a_channel_update(c->chid); | ||
2145 | /* A queued channel is always checked for job cleanup. */ | ||
2146 | gk20a_channel_worker_enqueue(c); | ||
2147 | } | ||
2148 | |||
2149 | /* | ||
2150 | * Stop deterministic channel activity for do_idle() when power needs to go off | ||
2151 | * momentarily but deterministic channels keep power refs for potentially a | ||
2152 | * long time. | ||
2153 | * | ||
2154 | * Takes write access on g->deterministic_busy. | ||
2155 | * | ||
2156 | * Must be paired with gk20a_channel_deterministic_unidle(). | ||
2157 | */ | ||
2158 | void gk20a_channel_deterministic_idle(struct gk20a *g) | ||
2159 | { | ||
2160 | struct fifo_gk20a *f = &g->fifo; | ||
2161 | u32 chid; | ||
2162 | |||
2163 | /* Grab exclusive access to the hw to block new submits */ | ||
2164 | nvgpu_rwsem_down_write(&g->deterministic_busy); | ||
2165 | |||
2166 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2167 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2168 | |||
2169 | if (!gk20a_channel_get(ch)) | ||
2170 | continue; | ||
2171 | |||
2172 | if (ch->deterministic) { | ||
2173 | /* | ||
2174 | * Drop the power ref taken when setting deterministic | ||
2175 | * flag. deterministic_unidle will put this and the | ||
2176 | * channel ref back. | ||
2177 | * | ||
2178 | * Hold the channel ref: it must not get freed in | ||
2179 | * between. A race could otherwise result in lost | ||
2180 | * gk20a_busy() via unidle, and in unbalanced | ||
2181 | * gk20a_idle() via closing the channel. | ||
2182 | */ | ||
2183 | gk20a_idle(g); | ||
2184 | } else { | ||
2185 | /* Not interesting, carry on. */ | ||
2186 | gk20a_channel_put(ch); | ||
2187 | } | ||
2188 | } | ||
2189 | } | ||
2190 | |||
2191 | /* | ||
2192 | * Allow deterministic channel activity again for do_unidle(). | ||
2193 | * | ||
2194 | * This releases write access on g->deterministic_busy. | ||
2195 | */ | ||
2196 | void gk20a_channel_deterministic_unidle(struct gk20a *g) | ||
2197 | { | ||
2198 | struct fifo_gk20a *f = &g->fifo; | ||
2199 | u32 chid; | ||
2200 | |||
2201 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2202 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2203 | |||
2204 | if (!gk20a_channel_get(ch)) | ||
2205 | continue; | ||
2206 | |||
2207 | /* | ||
2208 | * Deterministic state changes inside deterministic_busy lock, | ||
2209 | * which we took in deterministic_idle. | ||
2210 | */ | ||
2211 | if (ch->deterministic) { | ||
2212 | if (gk20a_busy(g)) | ||
2213 | nvgpu_err(g, "cannot busy() again!"); | ||
2214 | /* Took this in idle() */ | ||
2215 | gk20a_channel_put(ch); | ||
2216 | } | ||
2217 | |||
2218 | gk20a_channel_put(ch); | ||
2219 | } | ||
2220 | |||
2221 | /* Release submits, new deterministic channels and frees */ | ||
2222 | nvgpu_rwsem_up_write(&g->deterministic_busy); | ||
2223 | } | ||
2224 | |||
2225 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) | ||
2226 | { | ||
2227 | struct channel_gk20a *c = g->fifo.channel+chid; | ||
2228 | int err; | ||
2229 | |||
2230 | c->g = NULL; | ||
2231 | c->chid = chid; | ||
2232 | nvgpu_atomic_set(&c->bound, false); | ||
2233 | nvgpu_spinlock_init(&c->ref_obtain_lock); | ||
2234 | nvgpu_atomic_set(&c->ref_count, 0); | ||
2235 | c->referenceable = false; | ||
2236 | nvgpu_cond_init(&c->ref_count_dec_wq); | ||
2237 | |||
2238 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | ||
2239 | nvgpu_spinlock_init(&c->ref_actions_lock); | ||
2240 | #endif | ||
2241 | nvgpu_spinlock_init(&c->joblist.dynamic.lock); | ||
2242 | nvgpu_raw_spinlock_init(&c->timeout.lock); | ||
2243 | |||
2244 | nvgpu_init_list_node(&c->joblist.dynamic.jobs); | ||
2245 | nvgpu_init_list_node(&c->dbg_s_list); | ||
2246 | nvgpu_init_list_node(&c->event_id_list); | ||
2247 | nvgpu_init_list_node(&c->worker_item); | ||
2248 | |||
2249 | err = nvgpu_mutex_init(&c->ioctl_lock); | ||
2250 | if (err) | ||
2251 | return err; | ||
2252 | err = nvgpu_mutex_init(&c->error_notifier_mutex); | ||
2253 | if (err) | ||
2254 | goto fail_1; | ||
2255 | err = nvgpu_mutex_init(&c->joblist.cleanup_lock); | ||
2256 | if (err) | ||
2257 | goto fail_2; | ||
2258 | err = nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); | ||
2259 | if (err) | ||
2260 | goto fail_3; | ||
2261 | err = nvgpu_mutex_init(&c->sync_lock); | ||
2262 | if (err) | ||
2263 | goto fail_4; | ||
2264 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
2265 | err = nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | ||
2266 | if (err) | ||
2267 | goto fail_5; | ||
2268 | err = nvgpu_mutex_init(&c->cs_client_mutex); | ||
2269 | if (err) | ||
2270 | goto fail_6; | ||
2271 | #endif | ||
2272 | err = nvgpu_mutex_init(&c->event_id_list_lock); | ||
2273 | if (err) | ||
2274 | goto fail_7; | ||
2275 | err = nvgpu_mutex_init(&c->dbg_s_lock); | ||
2276 | if (err) | ||
2277 | goto fail_8; | ||
2278 | |||
2279 | nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); | ||
2280 | |||
2281 | return 0; | ||
2282 | |||
2283 | fail_8: | ||
2284 | nvgpu_mutex_destroy(&c->event_id_list_lock); | ||
2285 | fail_7: | ||
2286 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
2287 | nvgpu_mutex_destroy(&c->cs_client_mutex); | ||
2288 | fail_6: | ||
2289 | nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); | ||
2290 | fail_5: | ||
2291 | #endif | ||
2292 | nvgpu_mutex_destroy(&c->sync_lock); | ||
2293 | fail_4: | ||
2294 | nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); | ||
2295 | fail_3: | ||
2296 | nvgpu_mutex_destroy(&c->joblist.cleanup_lock); | ||
2297 | fail_2: | ||
2298 | nvgpu_mutex_destroy(&c->error_notifier_mutex); | ||
2299 | fail_1: | ||
2300 | nvgpu_mutex_destroy(&c->ioctl_lock); | ||
2301 | |||
2302 | return err; | ||
2303 | } | ||
2304 | |||
2305 | /* in this context the "channel" is the host1x channel which | ||
2306 | * maps to *all* gk20a channels */ | ||
2307 | int gk20a_channel_suspend(struct gk20a *g) | ||
2308 | { | ||
2309 | struct fifo_gk20a *f = &g->fifo; | ||
2310 | u32 chid; | ||
2311 | bool channels_in_use = false; | ||
2312 | u32 active_runlist_ids = 0; | ||
2313 | |||
2314 | gk20a_dbg_fn(""); | ||
2315 | |||
2316 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2317 | struct channel_gk20a *ch = &f->channel[chid]; | ||
2318 | if (gk20a_channel_get(ch)) { | ||
2319 | gk20a_dbg_info("suspend channel %d", chid); | ||
2320 | /* disable channel */ | ||
2321 | gk20a_disable_channel_tsg(g, ch); | ||
2322 | /* preempt the channel */ | ||
2323 | gk20a_fifo_preempt(g, ch); | ||
2324 | /* wait for channel update notifiers */ | ||
2325 | if (ch->update_fn) | ||
2326 | cancel_work_sync(&ch->update_fn_work); | ||
2327 | |||
2328 | channels_in_use = true; | ||
2329 | |||
2330 | active_runlist_ids |= BIT(ch->runlist_id); | ||
2331 | |||
2332 | gk20a_channel_put(ch); | ||
2333 | } | ||
2334 | } | ||
2335 | |||
2336 | if (channels_in_use) { | ||
2337 | gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true); | ||
2338 | |||
2339 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2340 | if (gk20a_channel_get(&f->channel[chid])) { | ||
2341 | g->ops.fifo.unbind_channel(&f->channel[chid]); | ||
2342 | gk20a_channel_put(&f->channel[chid]); | ||
2343 | } | ||
2344 | } | ||
2345 | } | ||
2346 | |||
2347 | gk20a_dbg_fn("done"); | ||
2348 | return 0; | ||
2349 | } | ||
2350 | |||
2351 | int gk20a_channel_resume(struct gk20a *g) | ||
2352 | { | ||
2353 | struct fifo_gk20a *f = &g->fifo; | ||
2354 | u32 chid; | ||
2355 | bool channels_in_use = false; | ||
2356 | u32 active_runlist_ids = 0; | ||
2357 | |||
2358 | gk20a_dbg_fn(""); | ||
2359 | |||
2360 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2361 | if (gk20a_channel_get(&f->channel[chid])) { | ||
2362 | gk20a_dbg_info("resume channel %d", chid); | ||
2363 | g->ops.fifo.bind_channel(&f->channel[chid]); | ||
2364 | channels_in_use = true; | ||
2365 | active_runlist_ids |= BIT(f->channel[chid].runlist_id); | ||
2366 | gk20a_channel_put(&f->channel[chid]); | ||
2367 | } | ||
2368 | } | ||
2369 | |||
2370 | if (channels_in_use) | ||
2371 | gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true); | ||
2372 | |||
2373 | gk20a_dbg_fn("done"); | ||
2374 | return 0; | ||
2375 | } | ||
2376 | |||
2377 | void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) | ||
2378 | { | ||
2379 | struct fifo_gk20a *f = &g->fifo; | ||
2380 | u32 chid; | ||
2381 | |||
2382 | gk20a_dbg_fn(""); | ||
2383 | |||
2384 | /* | ||
2385 | * Ensure that all pending writes are actually done before trying to | ||
2386 | * read semaphore values from DRAM. | ||
2387 | */ | ||
2388 | g->ops.mm.fb_flush(g); | ||
2389 | |||
2390 | for (chid = 0; chid < f->num_channels; chid++) { | ||
2391 | struct channel_gk20a *c = g->fifo.channel+chid; | ||
2392 | if (gk20a_channel_get(c)) { | ||
2393 | if (nvgpu_atomic_read(&c->bound)) { | ||
2394 | nvgpu_cond_broadcast_interruptible( | ||
2395 | &c->semaphore_wq); | ||
2396 | if (post_events) { | ||
2397 | if (gk20a_is_channel_marked_as_tsg(c)) { | ||
2398 | struct tsg_gk20a *tsg = | ||
2399 | &g->fifo.tsg[c->tsgid]; | ||
2400 | |||
2401 | gk20a_tsg_event_id_post_event(tsg, | ||
2402 | NVGPU_EVENT_ID_BLOCKING_SYNC); | ||
2403 | } else { | ||
2404 | gk20a_channel_event_id_post_event(c, | ||
2405 | NVGPU_EVENT_ID_BLOCKING_SYNC); | ||
2406 | } | ||
2407 | } | ||
2408 | /* | ||
2409 | * Only non-deterministic channels get the | ||
2410 | * channel_update callback. We don't allow | ||
2411 | * semaphore-backed syncs for these channels | ||
2412 | * anyways, since they have a dependency on | ||
2413 | * the sync framework. | ||
2414 | * If deterministic channels are receiving a | ||
2415 | * semaphore wakeup, it must be for a | ||
2416 | * user-space managed | ||
2417 | * semaphore. | ||
2418 | */ | ||
2419 | if (!c->deterministic) | ||
2420 | gk20a_channel_update(c); | ||
2421 | } | ||
2422 | gk20a_channel_put(c); | ||
2423 | } | ||
2424 | } | ||
2425 | } | ||